From 292709b9cf3ba470af94b62c9bb60284cc581b79 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Sat, 7 May 2022 20:14:13 +0800 Subject: [PATCH 001/963] ASoC: fsl_micfil: explicitly clear software reset bit SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined as non volatile register, it still remain in regmap cache after set, then every update of REG_MICFIL_CTRL1, software reset happens. to avoid this, clear it explicitly. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1651925654-32060-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_micfil.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 9f90989ac59a..cb84d95c3aac 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -191,6 +191,17 @@ static int fsl_micfil_reset(struct device *dev) return ret; } + /* + * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined + * as non-volatile register, so SRES still remain in regmap + * cache after set, that every update of REG_MICFIL_CTRL1, + * software reset happens. so clear it explicitly. + */ + ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1, + MICFIL_CTRL1_SRES); + if (ret) + return ret; + return 0; } From b776c4a4618ec1b5219d494c423dc142f23c4e8f Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Sat, 7 May 2022 20:14:14 +0800 Subject: [PATCH 002/963] ASoC: fsl_micfil: explicitly clear CHnF flags There may be failure when start 1 channel recording after 8 channels recording. The reason is that the CHnF flags are not cleared successfully by software reset. This issue is triggerred by the change of clearing software reset bit. CHnF flags are write 1 clear bits. Clear them by force write. Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1651925654-32060-2-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_micfil.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index cb84d95c3aac..d1cd104f8584 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -202,6 +202,14 @@ static int fsl_micfil_reset(struct device *dev) if (ret) return ret; + /* + * Set SRES should clear CHnF flags, But even add delay here + * the CHnF may not be cleared sometimes, so clear CHnF explicitly. + */ + ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF); + if (ret) + return ret; + return 0; } From 698813ba8c580efb356ace8dbf55f61dac6063a8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 May 2022 14:41:36 +0100 Subject: [PATCH 003/963] ASoC: ops: Fix bounds check for _sx controls For _sx controls the semantics of the max field is not the usual one, max is the number of steps rather than the maximum value. This means that our check in snd_soc_put_volsw_sx() needs to just check against the maximum value. Fixes: 4f1e50d6a9cf9c1b ("ASoC: ops: Reject out of bounds values in snd_soc_put_volsw_sx()") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220511134137.169575-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index e693070f51fe..1ac7e2ce31a1 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -435,7 +435,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0]; if (mc->platform_max && val > mc->platform_max) return -EINVAL; - if (val > max - min) + if (val > max) return -EINVAL; val_mask = mask << shift; val = (val + min) & mask; From 97eea946b93961fffd29448dcda7398d0d51c4b2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 May 2022 14:41:37 +0100 Subject: [PATCH 004/963] ASoC: ops: Check bounds for second channel in snd_soc_put_volsw_sx() The bounds checks in snd_soc_put_volsw_sx() are only being applied to the first channel, meaning it is possible to write out of bounds values to the second channel in stereo controls. Add appropriate checks. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220511134137.169575-2-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 1ac7e2ce31a1..7cac26a64e0c 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -451,6 +451,12 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, val_mask = mask << rshift; val2 = (ucontrol->value.integer.value[1] + min) & mask; + + if (mc->platform_max && val2 > mc->platform_max) + return -EINVAL; + if (val2 > max) + return -EINVAL; + val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, From 19c5bda74dc45fee598a57600b550c9ea7662f10 Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Thu, 12 May 2022 15:46:40 +0800 Subject: [PATCH 005/963] ASoC: tlv320adc3xxx: Fix build error for implicit function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sound/soc/codecs/tlv320adc3xxx.c: In function ‘adc3xxx_i2c_probe’: sound/soc/codecs/tlv320adc3xxx.c:1359:21: error: implicit declaration of function ‘devm_gpiod_get’; did you mean ‘devm_gpio_free’? [-Werror=implicit-function-declaration] adc3xxx->rst_pin = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); ^~~~~~~~~~~~~~ devm_gpio_free CC [M] drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.o LD [M] sound/soc/codecs/snd-soc-ak4671.o LD [M] sound/soc/codecs/snd-soc-arizona.o LD [M] sound/soc/codecs/snd-soc-cros-ec-codec.o LD [M] sound/soc/codecs/snd-soc-ak4641.o LD [M] sound/soc/codecs/snd-soc-alc5632.o sound/soc/codecs/tlv320adc3xxx.c:1359:50: error: ‘GPIOD_OUT_LOW’ undeclared (first use in this function); did you mean ‘GPIOF_INIT_LOW’? adc3xxx->rst_pin = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); ^~~~~~~~~~~~~ GPIOF_INIT_LOW sound/soc/codecs/tlv320adc3xxx.c:1359:50: note: each undeclared identifier is reported only once for each function it appears in LD [M] sound/soc/codecs/snd-soc-cs35l32.o sound/soc/codecs/tlv320adc3xxx.c:1408:2: error: implicit declaration of function ‘gpiod_set_value_cansleep’; did you mean ‘gpio_set_value_cansleep’? [-Werror=implicit-function-declaration] gpiod_set_value_cansleep(adc3xxx->rst_pin, 1); ^~~~~~~~~~~~~~~~~~~~~~~~ gpio_set_value_cansleep LD [M] sound/soc/codecs/snd-soc-cs35l41-lib.o LD [M] sound/soc/codecs/snd-soc-cs35l36.o LD [M] sound/soc/codecs/snd-soc-cs35l34.o LD [M] sound/soc/codecs/snd-soc-cs35l41.o CC [M] drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.o cc1: all warnings being treated as errors Fixes: e9a3b57efd28 ("ASoC: codec: tlv320adc3xxx: New codec driver") Signed-off-by: Hui Tang Link: https://lore.kernel.org/r/20220512074640.75550-3-tanghui20@huawei.com Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320adc3xxx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index ae18982ac310..104167c6fd93 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = { static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx) { +#ifdef CONFIG_GPIOLIB gpiochip_remove(&adc3xxx->gpio_chip); +#endif } static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx) From 3a5913183aa1b14148c723bda030e6102ad73008 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Sun, 9 Oct 2022 22:16:43 +0300 Subject: [PATCH 006/963] xfrm: fix "disable_policy" on ipv4 early demux The commit in the "Fixes" tag tried to avoid a case where policy check is ignored due to dst caching in next hops. However, when the traffic is locally consumed, the dst may be cached in a local TCP or UDP socket as part of early demux. In this case the "disable_policy" flag is not checked as ip_route_input_noref() was only called before caching, and thus, packets after the initial packet in a flow will be dropped if not matching policies. Fix by checking the "disable_policy" flag also when a valid dst is already available. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216557 Reported-by: Monil Patel Fixes: e6175a2ed1f1 ("xfrm: fix "disable_policy" flag use when arriving from different devices") Signed-off-by: Eyal Birger ---- v2: use dev instead of skb->dev Signed-off-by: Steffen Klassert --- net/ipv4/ip_input.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1b512390b3cf..e880ce77322a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -366,6 +366,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk, iph->tos, dev); if (unlikely(err)) goto drop_error; + } else { + struct in_device *in_dev = __in_dev_get_rcu(dev); + + if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) + IPCB(skb)->flags |= IPSKB_NOPOLICY; } #ifdef CONFIG_IP_ROUTE_CLASSID From d83f7040e18489265b4b121f33f99b02e52dabda Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Tue, 11 Oct 2022 11:01:37 +0300 Subject: [PATCH 007/963] xfrm: lwtunnel: squelch kernel warning in case XFRM encap type is not available Ido reported that a kernel warning [1] can be triggered from user space when the kernel is compiled with CONFIG_MODULES=y and CONFIG_XFRM=n when adding an xfrm encap type route, e.g: $ ip route add 198.51.100.0/24 dev dummy1 encap xfrm if_id 1 Error: lwt encapsulation type not supported. The reason for the warning is that the LWT infrastructure has an autoloading feature which is meant only for encap types that don't use a net device, which is not the case in xfrm encap. Mute this warning for xfrm encap as there's no encap module to autoload in this case. [1] WARNING: CPU: 3 PID: 2746262 at net/core/lwtunnel.c:57 lwtunnel_valid_encap_type+0x4f/0x120 [...] Call Trace: rtm_to_fib_config+0x211/0x350 inet_rtm_newroute+0x3a/0xa0 rtnetlink_rcv_msg+0x154/0x3c0 netlink_rcv_skb+0x49/0xf0 netlink_unicast+0x22f/0x350 netlink_sendmsg+0x208/0x440 ____sys_sendmsg+0x21f/0x250 ___sys_sendmsg+0x83/0xd0 __sys_sendmsg+0x54/0xa0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Reported-by: Ido Schimmel Fixes: 2c2493b9da91 ("xfrm: lwtunnel: add lwtunnel support for xfrm interfaces in collect_md mode") Signed-off-by: Eyal Birger Tested-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Steffen Klassert --- net/core/lwtunnel.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 6fac2f0ef074..711cd3b4347a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -48,9 +48,11 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "RPL"; case LWTUNNEL_ENCAP_IOAM6: return "IOAM6"; + case LWTUNNEL_ENCAP_XFRM: + /* module autoload not supported for encap type */ + return NULL; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: - case LWTUNNEL_ENCAP_XFRM: case LWTUNNEL_ENCAP_NONE: case __LWTUNNEL_ENCAP_MAX: /* should not have got here */ From ac5408991ea6b06e29129b4d4861097c4c3e0d59 Mon Sep 17 00:00:00 2001 From: Aman Dhoot Date: Sat, 15 Oct 2022 20:41:17 -0700 Subject: [PATCH 008/963] Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode The device works fine in native RMI mode, there is no reason to use legacy PS/2 mode with it. Signed-off-by: Aman Dhoot Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index fa021af8506e..b0f776448a1c 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -192,6 +192,7 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ + "SYN3286", /* HP Laptop 15-da3001TU */ NULL }; From a01aca4b05174b6dee2392ec44406f85e0f8bd46 Mon Sep 17 00:00:00 2001 From: Sheng-Liang Pan Date: Sun, 16 Oct 2022 21:47:46 -0700 Subject: [PATCH 009/963] dt-bindings: input: touchscreen: Add compatible for Goodix GT7986U chip Add a compatible for Goodix touch screen chip GT7986U which is is expected to be fully compatible with a driver written for GT7375P. Signed-off-by: Sheng-Liang Pan Reviewed-by: Douglas Anderson Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221006185333.v7.3.I52e4b4b20e2eb0ae20f2a9bb198aa6410f04cf16@changeid Signed-off-by: Dmitry Torokhov --- Documentation/devicetree/bindings/input/goodix,gt7375p.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml b/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml index fe1c5016f7f3..1c191bc5a178 100644 --- a/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml +++ b/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml @@ -16,8 +16,11 @@ description: properties: compatible: - items: + oneOf: - const: goodix,gt7375p + - items: + - const: goodix,gt7986u + - const: goodix,gt7375p reg: enum: From ef40e88d1be172ca05e6e8cef00284a1cf4f43d8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 13 Oct 2022 14:33:35 -0700 Subject: [PATCH 010/963] arm64: dts: rockchip: Drop RK3399-Scarlet's repeated ec_ap_int_l definition This is repeated a few lines down. Signed-off-by: Brian Norris Link: https://lore.kernel.org/r/20221013213336.1779917-1-briannorris@chromium.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index ed3348b558f8..a47d9f758611 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -734,10 +734,6 @@ qca_bt: bluetooth@1 { }; /* PINCTRL OVERRIDES */ -&ec_ap_int_l { - rockchip,pins = <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_up>; -}; - &ap_fw_wp { rockchip,pins = <0 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>; }; From adbab347ec8861aa80d850693df3cd005ec65a99 Mon Sep 17 00:00:00 2001 From: Furkan Kardame Date: Mon, 10 Oct 2022 22:01:40 +0300 Subject: [PATCH 011/963] arm64: dts: rockchip: Fix gmac failure of rgmii-id from rk3566-roc-pc Lan does not work on rgmii-id, most rk356x devices lan is being switched to rgmii. Signed-off-by: Furkan Kardame Link: https://lore.kernel.org/r/20221010190142.18340-2-f.kardame@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts index dba648c2f57e..72c91d2fcd26 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts @@ -142,7 +142,7 @@ &gmac1 { assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>; assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>; clock_in_out = "input"; - phy-mode = "rgmii-id"; + phy-mode = "rgmii"; phy-supply = <&vcc_3v3>; pinctrl-names = "default"; pinctrl-0 = <&gmac1m0_miim From 2440ad0d851e404adcd1b9ad758f28bd59365bae Mon Sep 17 00:00:00 2001 From: Furkan Kardame Date: Mon, 10 Oct 2022 22:01:41 +0300 Subject: [PATCH 012/963] arm64: dts: rockchip: Fix i2c3 pinctrl on rk3566-roc-pc As per device schematic i2c3 pinctrl is connected to m0 instead of m1 Signed-off-by: Furkan Kardame Link: https://lore.kernel.org/r/20221010190142.18340-3-f.kardame@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts index 72c91d2fcd26..72809246c4c1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts @@ -432,7 +432,7 @@ &i2c2 { &i2c3 { pinctrl-names = "default"; - pinctrl-0 = <&i2c3m1_xfer>; + pinctrl-0 = <&i2c3m0_xfer>; status = "okay"; }; From b44bc59d0d279fa4f3dc11b895f2c8f77719885d Mon Sep 17 00:00:00 2001 From: Furkan Kardame Date: Mon, 10 Oct 2022 22:01:42 +0300 Subject: [PATCH 013/963] arm64: dts: rockchip: remove i2c5 from rk3566-roc-pc i2c5 is owned by hdmi port Signed-off-by: Furkan Kardame Link: https://lore.kernel.org/r/20221010190142.18340-4-f.kardame@manjaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts index 72809246c4c1..9fd262334d77 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts @@ -436,10 +436,6 @@ &i2c3 { status = "okay"; }; -&i2c5 { - status = "okay"; -}; - &mdio1 { rgmii_phy1: ethernet-phy@0 { compatible = "ethernet-phy-ieee802.3-c22"; From 463be3cb357dab7d7e4d8dcc7c15c642e10c5bef Mon Sep 17 00:00:00 2001 From: Jensen Huang Date: Wed, 28 Sep 2022 17:11:29 +0800 Subject: [PATCH 014/963] arm64: dts: rockchip: add enable-strobe-pulldown to emmc phy on nanopi4 Internal pull-down for strobe line (GRF_EMMCPHY_CON2[9]) was disabled by commit 8b5c2b45b8f0, which causes I/O error in HS400 mode. Tested on NanoPC-T4. Fixes: 8b5c2b45b8f0 ("phy: rockchip: set pulldown for strobe line in dts") Signed-off-by: Jensen Huang Link: https://lore.kernel.org/r/20220928091129.20597-1-jensenhuang@friendlyarm.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi index 278123b4f911..b6e082f1f6d9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi @@ -167,6 +167,7 @@ &cpu_l3 { }; &emmc_phy { + rockchip,enable-strobe-pulldown; status = "okay"; }; From 2dcd7e0c821fe9b663f7d3382b6d2faa8edf2129 Mon Sep 17 00:00:00 2001 From: Lev Popov Date: Mon, 26 Sep 2022 14:53:48 +0200 Subject: [PATCH 015/963] arm64: dts: rockchip: fix quartz64-a bluetooth configuration For "Quartz64 Model A" add missing RTS line to the UART interface used by bluetooth and swap bluetooth host-wakeup and device-wakeup gpio pins to match the boards physical layout. This changes are necessary to make bluetooth provided by the wireless module work. Also set max-speed on the bluetooth device as it's not automatically detected. Fixes: b33a22a1e7c4 ("arm64: dts: rockchip: add basic dts for Pine64 Quartz64-A") Signed-off-by: Lev Popov Link: https://lore.kernel.org/r/20220926125350.64783-1-leo@nabam.net Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index a05460b92415..25a8c781f4e7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -740,7 +740,7 @@ &uart0 { &uart1 { pinctrl-names = "default"; - pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn>; + pinctrl-0 = <&uart1m0_xfer &uart1m0_ctsn &uart1m0_rtsn>; status = "okay"; uart-has-rtscts; @@ -748,13 +748,14 @@ bluetooth { compatible = "brcm,bcm43438-bt"; clocks = <&rk817 1>; clock-names = "lpo"; - device-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; - host-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; + host-wakeup-gpios = <&gpio2 RK_PC1 GPIO_ACTIVE_HIGH>; + device-wakeup-gpios = <&gpio2 RK_PC0 GPIO_ACTIVE_HIGH>; shutdown-gpios = <&gpio2 RK_PB7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&bt_host_wake_l &bt_wake_l &bt_enable_h>; vbat-supply = <&vcc_sys>; vddio-supply = <&vcca1v8_pmu>; + max-speed = <3000000>; }; }; From 849c19d14940b87332d5d59c7fc581d73f2099fd Mon Sep 17 00:00:00 2001 From: FUKAUMI Naoki Date: Sat, 24 Sep 2022 11:28:12 +0000 Subject: [PATCH 016/963] arm64: dts: rockchip: keep I2S1 disabled for GPIO function on ROCK Pi 4 series I2S1 pins are exposed on 40-pin header on Radxa ROCK Pi 4 series. their default function is GPIO, so I2S1 need to be disabled. Signed-off-by: FUKAUMI Naoki Link: https://lore.kernel.org/r/20220924112812.1219-1-naoki@radxa.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 645ced6617a6..1f76d3501bda 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -509,7 +509,6 @@ i2s0_p0_0: endpoint { &i2s1 { rockchip,playback-channels = <2>; rockchip,capture-channels = <2>; - status = "okay"; }; &i2s2 { From 40b21d466a86bd5b10d24f59746ed41283a9b3f6 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Fri, 19 Aug 2022 00:06:27 +0200 Subject: [PATCH 017/963] arm64: dts: qcom: ipq8074: correct APCS register space size APCS DTS addition that was merged, was not supposed to get merged as it was part of patch series that was superseded by 2 more patch series that resolved issues with this one and greatly simplified things. Since it already got merged, start by correcting the register space size as APCS will not be providing regmap for PLL and it will conflict with the standalone A53 PLL node. Fixes: 50ed9fffec3a ("arm64: dts: qcom: ipq8074: add APCS node") Signed-off-by: Robert Marko Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220818220628.339366-8-robimarko@gmail.com --- arch/arm64/boot/dts/qcom/ipq8074.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index a47acf9bdf24..a721cdd80489 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -668,7 +668,7 @@ watchdog: watchdog@b017000 { apcs_glb: mailbox@b111000 { compatible = "qcom,ipq8074-apcs-apps-global"; - reg = <0x0b111000 0x6000>; + reg = <0x0b111000 0x1000>; #clock-cells = <1>; #mbox-cells = <1>; From 20772f506fa4aab4d03035807f30eecee856e274 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Wed, 21 Sep 2022 12:52:58 +0300 Subject: [PATCH 018/963] Revert "arm64: dts: qcom: msm8996: add missing TCSR syscon compatible" This reverts commit 8a99e0fc8bd3 ("arm64: dts: qcom: msm8996: add missing TCSR syscon compatible"). This commit marked the saw3 (syscon@9a10000) node as compatible with qcom,tcsr-msm8996. However the mentioned device is not not a TCSR (system registers, hardware mutex). It is a CPU power controller/regulator, which is currently being handled as a syscon. Fixes: 8a99e0fc8bd3 ("arm64: dts: qcom: msm8996: add missing TCSR syscon compatible") Cc: Krzysztof Kozlowski Signed-off-by: Dmitry Baryshkov Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220921095258.2332568-1-dmitry.baryshkov@linaro.org --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index c0a2baffa49d..aba717644391 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -3504,7 +3504,7 @@ frame@98c0000 { }; saw3: syscon@9a10000 { - compatible = "qcom,tcsr-msm8996", "syscon"; + compatible = "syscon"; reg = <0x09a10000 0x1000>; }; From bd9f3dcf42d943b53190f99bcdbcfe98a56ac4cd Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 29 Aug 2022 09:49:47 -0700 Subject: [PATCH 019/963] arm64: dts: qcom: sa8155p-adp: Specify which LDO modes are allowed This board uses RPMH, specifies "regulator-allow-set-load" for LDOs, but doesn't specify any modes with "regulator-allowed-modes". Prior to commit efb0cb50c427 ("regulator: qcom-rpmh: Implement get_optimum_mode(), not set_load()") the above meant that we were able to set either LPM or HPM mode. After that commit (and fixes [1]) we'll be stuck at the initial mode. Discussion of this has resulted in the decision that the old dts files were wrong and should be fixed to fully restore old functionality. Let's re-enable the old functionality by fixing the dts. NOTE: while here, let's also remove the nonsensical "regulator-allow-set-load" on the fixed regulator "vreg_s4a_1p8". [1] https://lore.kernel.org/r/20220824142229.RFT.v2.2.I6f77860e5cd98bf5c67208fa9edda4a08847c304@changeid Fixes: 5b85e8f2225c ("arm64: dts: qcom: sa8155p-adp: Add base dts file") Signed-off-by: Douglas Anderson Reviewed-by: Andrew Halaney Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220829094903.v2.1.Id59c32b560c4662d8b3697de2bd494d08d654806@changeid --- arch/arm64/boot/dts/qcom/sa8155p-adp.dts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts index 87ab0e1ecd16..4dee790f1049 100644 --- a/arch/arm64/boot/dts/qcom/sa8155p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8155p-adp.dts @@ -43,7 +43,6 @@ vreg_s4a_1p8: smps4 { regulator-always-on; regulator-boot-on; - regulator-allow-set-load; vin-supply = <&vreg_3p3>; }; @@ -137,6 +136,9 @@ vreg_l5a_0p88: ldo5 { regulator-max-microvolt = <880000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l7a_1p8: ldo7 { @@ -152,6 +154,9 @@ vreg_l10a_2p96: ldo10 { regulator-max-microvolt = <2960000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l11a_0p8: ldo11 { @@ -258,6 +263,9 @@ vreg_l5c_1p2: ldo5 { regulator-max-microvolt = <1200000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l7c_1p8: ldo7 { @@ -273,6 +281,9 @@ vreg_l8c_1p2: ldo8 { regulator-max-microvolt = <1200000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l10c_3p3: ldo10 { From 09a1710b3e12e7ac8d871506bc395a9e8a0f63d6 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 29 Aug 2022 09:49:48 -0700 Subject: [PATCH 020/963] arm64: dts: qcom: sa8295p-adp: Specify which LDO modes are allowed This board uses RPMH, specifies "regulator-allow-set-load" for LDOs, but doesn't specify any modes with "regulator-allowed-modes". Prior to commit efb0cb50c427 ("regulator: qcom-rpmh: Implement get_optimum_mode(), not set_load()") the above meant that we were able to set either LPM or HPM mode. After that commit (and fixes [1]) we'll be stuck at the initial mode. Discussion of this has resulted in the decision that the old dts files were wrong and should be fixed to fully restore old functionality. Let's re-enable the old functionality by fixing the dts. [1] https://lore.kernel.org/r/20220824142229.RFT.v2.2.I6f77860e5cd98bf5c67208fa9edda4a08847c304@changeid Fixes: 519183af39b2 ("arm64: dts: qcom: add SA8540P and ADP") Signed-off-by: Douglas Anderson Reviewed-by: Andrew Halaney Reviewed-by: Konrad Dybcio Reviewed-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220829094903.v2.2.I430a56702ab0af65244e62667bb7743107de0c96@changeid --- arch/arm64/boot/dts/qcom/sa8295p-adp.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sa8295p-adp.dts b/arch/arm64/boot/dts/qcom/sa8295p-adp.dts index b608b82dff03..2c62ba6a49c5 100644 --- a/arch/arm64/boot/dts/qcom/sa8295p-adp.dts +++ b/arch/arm64/boot/dts/qcom/sa8295p-adp.dts @@ -83,6 +83,9 @@ vreg_l3c: ldo3 { regulator-max-microvolt = <1200000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l4c: ldo4 { @@ -98,6 +101,9 @@ vreg_l6c: ldo6 { regulator-max-microvolt = <1200000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l7c: ldo7 { @@ -113,6 +119,9 @@ vreg_l10c: ldo10 { regulator-max-microvolt = <2504000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l17c: ldo17 { @@ -121,6 +130,9 @@ vreg_l17c: ldo17 { regulator-max-microvolt = <2504000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; }; From a4543e21ae363f4f094fb3c3503d77029e0c5d90 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 29 Aug 2022 09:49:49 -0700 Subject: [PATCH 021/963] arm64: dts: qcom: sc8280xp-crd: Specify which LDO modes are allowed This board uses RPMH, specifies "regulator-allow-set-load" for LDOs, but doesn't specify any modes with "regulator-allowed-modes". Prior to commit efb0cb50c427 ("regulator: qcom-rpmh: Implement get_optimum_mode(), not set_load()") the above meant that we were able to set either LPM or HPM mode. After that commit (and fixes [1]) we'll be stuck at the initial mode. Discussion of this has resulted in the decision that the old dts files were wrong and should be fixed to fully restore old functionality. Let's re-enable the old functionality by fixing the dts. [1] https://lore.kernel.org/r/20220824142229.RFT.v2.2.I6f77860e5cd98bf5c67208fa9edda4a08847c304@changeid Fixes: ccd3517faf18 ("arm64: dts: qcom: sc8280xp: Add reference device") Signed-off-by: Douglas Anderson Reviewed-by: Andrew Halaney Reviewed-by: Konrad Dybcio Reviewed-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220829094903.v2.3.Ie7d2c50d2b42ef2d364f3a0c8e300e5ce1875b79@changeid --- arch/arm64/boot/dts/qcom/sc8280xp-crd.dts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts index fea7d8273ccd..5e30349efd20 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts +++ b/arch/arm64/boot/dts/qcom/sc8280xp-crd.dts @@ -124,6 +124,9 @@ vreg_l7c: ldo7 { regulator-max-microvolt = <2504000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l13c: ldo13 { @@ -146,6 +149,9 @@ vreg_l3d: ldo3 { regulator-max-microvolt = <1200000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l4d: ldo4 { From aa30e786202e4ed1df980442d305658441f65859 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 29 Aug 2022 09:49:50 -0700 Subject: [PATCH 022/963] arm64: dts: qcom: sm8150-xperia-kumano: Specify which LDO modes are allowed This board uses RPMH, specifies "regulator-allow-set-load" for LDOs, but doesn't specify any modes with "regulator-allowed-modes". Prior to commit efb0cb50c427 ("regulator: qcom-rpmh: Implement get_optimum_mode(), not set_load()") the above meant that we were able to set either LPM or HPM mode. After that commit (and fixes [1]) we'll be stuck at the initial mode. Discussion of this has resulted in the decision that the old dts files were wrong and should be fixed to fully restore old functionality. Let's re-enable the old functionality by fixing the dts. [1] https://lore.kernel.org/r/20220824142229.RFT.v2.2.I6f77860e5cd98bf5c67208fa9edda4a08847c304@changeid Fixes: d0a6ce59ea4e ("arm64: dts: qcom: sm8150: Add support for SONY Xperia 1 / 5 (Kumano platform)") Signed-off-by: Douglas Anderson Reviewed-by: Andrew Halaney Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220829094903.v2.4.I51d60414a42ba9e3008e208d60a04c9ffc425fa7@changeid --- arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi index 014fe3a31548..fb6e5a140c9f 100644 --- a/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150-sony-xperia-kumano.dtsi @@ -348,6 +348,9 @@ vreg_l6c_2p9: ldo6 { regulator-max-microvolt = <2960000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l7c_3p0: ldo7 { @@ -367,6 +370,9 @@ vreg_l9c_2p9: ldo9 { regulator-max-microvolt = <2960000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l10c_3p3: ldo10 { From b7870d460c05ce31e2311036d91de1e2e0b32cea Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 29 Aug 2022 09:49:51 -0700 Subject: [PATCH 023/963] arm64: dts: qcom: sm8250-xperia-edo: Specify which LDO modes are allowed This board uses RPMH, specifies "regulator-allow-set-load" for LDOs, but doesn't specify any modes with "regulator-allowed-modes". Prior to commit efb0cb50c427 ("regulator: qcom-rpmh: Implement get_optimum_mode(), not set_load()") the above meant that we were able to set either LPM or HPM mode. After that commit (and fixes [1]) we'll be stuck at the initial mode. Discussion of this has resulted in the decision that the old dts files were wrong and should be fixed to fully restore old functionality. Let's re-enable the old functionality by fixing the dts. [1] https://lore.kernel.org/r/20220824142229.RFT.v2.2.I6f77860e5cd98bf5c67208fa9edda4a08847c304@changeid Fixes: 69cdb97ef652 ("arm64: dts: qcom: sm8250: Add support for SONY Xperia 1 II / 5 II (Edo platform)") Signed-off-by: Douglas Anderson Reviewed-by: Andrew Halaney Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220829094903.v2.5.Ie446d5183d8b1e9ec4e32228ca300e604e3315eb@changeid --- arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi index 549e0a2aa9fe..5428aab3058d 100644 --- a/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250-sony-xperia-edo.dtsi @@ -317,6 +317,9 @@ vreg_l6c_2p9: ldo6 { regulator-max-microvolt = <2960000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l7c_2p85: ldo7 { @@ -339,6 +342,9 @@ vreg_l9c_2p9: ldo9 { regulator-max-microvolt = <2960000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l10c_3p3: ldo10 { From 1ce8aaf6abdc35cde555924418b3d4516b4ec871 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 29 Aug 2022 09:49:52 -0700 Subject: [PATCH 024/963] arm64: dts: qcom: sm8350-hdk: Specify which LDO modes are allowed This board uses RPMH, specifies "regulator-allow-set-load" for LDOs, but doesn't specify any modes with "regulator-allowed-modes". Prior to commit efb0cb50c427 ("regulator: qcom-rpmh: Implement get_optimum_mode(), not set_load()") the above meant that we were able to set either LPM or HPM mode. After that commit (and fixes [1]) we'll be stuck at the initial mode. Discussion of this has resulted in the decision that the old dts files were wrong and should be fixed to fully restore old functionality. Let's re-enable the old functionality by fixing the dts. [1] https://lore.kernel.org/r/20220824142229.RFT.v2.2.I6f77860e5cd98bf5c67208fa9edda4a08847c304@changeid Fixes: 9208c19f2124 ("arm64: dts: qcom: Introduce SM8350 HDK") Signed-off-by: Douglas Anderson Reviewed-by: Andrew Halaney Reviewed-by: Vinod Koul Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220829094903.v2.6.I6799be85cf36d3b494f803cba767a569080624f5@changeid --- arch/arm64/boot/dts/qcom/sm8350-hdk.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts index 0fcf5bd88fc7..69ae6503c2f6 100644 --- a/arch/arm64/boot/dts/qcom/sm8350-hdk.dts +++ b/arch/arm64/boot/dts/qcom/sm8350-hdk.dts @@ -107,6 +107,9 @@ vreg_l5b_0p88: ldo5 { regulator-max-microvolt = <888000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l6b_1p2: ldo6 { @@ -115,6 +118,9 @@ vreg_l6b_1p2: ldo6 { regulator-max-microvolt = <1208000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l7b_2p96: ldo7 { @@ -123,6 +129,9 @@ vreg_l7b_2p96: ldo7 { regulator-max-microvolt = <2504000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; vreg_l9b_1p2: ldo9 { @@ -131,6 +140,9 @@ vreg_l9b_1p2: ldo9 { regulator-max-microvolt = <1200000>; regulator-initial-mode = ; regulator-allow-set-load; + regulator-allowed-modes = + ; }; }; From 8d6b458ce6e93286a607e54f787f7a86067f58bd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 5 Oct 2022 16:33:05 +0200 Subject: [PATCH 025/963] arm64: dts: qcom: sc8280xp: fix ufs_card_phy ref clock The GCC_UFS_REF_CLKREF_CLK must be enabled or the second UFS controller fails to enumerate on sa8295p-adp. Note that the vendor kernel enables both GCC_UFS_REF_CLKREF_CLK and GCC_UFS_1_CARD_CLKREF_CLK and it is possible that the former should be modelled as a parent of the latter. The clock driver also has a GCC_UFS_CARD_CLKREF_CLK clock which the firmware appears to enable on the ADP. The usual lack of documentation for Qualcomm SoCs makes this a highly annoying guessing game, but as the second controller works on the ADP without either card reference clock enabled, only enable GCC_UFS_REF_CLKREF_CLK for now. Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Reviewed-by: Brian Masney Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221005143305.388-1-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index c32bcded2aef..4921bbaf5caf 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -959,7 +959,7 @@ ufs_card_phy: phy@1da7000 { ranges; clock-names = "ref", "ref_aux"; - clocks = <&gcc GCC_UFS_1_CARD_CLKREF_CLK>, + clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_CARD_PHY_AUX_CLK>; resets = <&ufs_card_hc 0>; From f3aa975e230e060c07dcfdf3fe92b59809422c13 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Thu, 6 Oct 2022 10:55:29 -0400 Subject: [PATCH 026/963] arm64: dts: qcom: sc8280xp: correct ref clock for ufs_mem_phy The first UFS host controller fails to start on the SA8540P automotive board (QDrive3) due to the following errors: ufshcd-qcom 1d84000.ufs: ufshcd_query_flag: Sending flag query for idn 18 failed, err = 253 ufshcd-qcom 1d84000.ufs: ufshcd_query_flag: Sending flag query for idn 18 failed, err = 253 ufshcd-qcom 1d84000.ufs: ufshcd_query_flag: Sending flag query for idn 18 failed, err = 253 ufshcd-qcom 1d84000.ufs: ufshcd_query_flag_retry: query attribute, opcode 5, idn 18, failed with error 253 after 3 retries The system eventually fails to boot with the warning: gcc_ufs_phy_axi_clk status stuck at 'off' This issue can be worked around by adding clk_ignore_unused to the kernel command line since the system firmware sets up this clock for us. Let's fix this issue by updating the ref clock on ufs_mem_phy. Note that the downstream MSM 5.4 sources list this as ref_clk_parent. With this patch, the SA8540P is able to be booted without clk_ignore_unused. Signed-off-by: Brian Masney Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Tested-by: Johan Hovold Reviewed-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221006145529.755521-1-bmasney@redhat.com --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 4921bbaf5caf..f5824f44e15f 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -891,7 +891,7 @@ ufs_mem_phy: phy@1d87000 { ranges; clock-names = "ref", "ref_aux"; - clocks = <&rpmhcc RPMH_CXO_CLK>, + clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>; resets = <&ufs_mem_hc 0>; From 31b3b3059791be536e2ec0c6830767b596af340f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 19 Sep 2022 11:44:51 +0200 Subject: [PATCH 027/963] arm64: dts: qcom: sc8280xp: fix USB0 PHY PCS_MISC registers The USB0 SS PHY node had the PCS_MISC register block (0x1200) replaced with PCS_USB (0x1700). Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220919094454.1574-2-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index f5824f44e15f..cab9965fe0f8 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -1184,7 +1184,7 @@ usb_0_ssphy: usb3-phy@88eb400 { <0 0x088ec400 0 0x1f0>, <0 0x088eba00 0 0x100>, <0 0x088ebc00 0 0x3ec>, - <0 0x088ec700 0 0x64>; + <0 0x088ec200 0 0x18>; #phy-cells = <0>; #clock-cells = <0>; clocks = <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>; From 81cad26c6c3984d01b0612069c70ffd820f62dfa Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 19 Sep 2022 11:44:52 +0200 Subject: [PATCH 028/963] arm64: dts: qcom: sc8280xp: fix USB1 PHY RX1 registers The USB1 SS PHY node had the RX1 register block (0x600) replaced with RX2 (0xc00). Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220919094454.1574-3-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index cab9965fe0f8..fce05d69ffbe 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -1242,7 +1242,7 @@ usb_1_qmpphy: phy-wrapper@8904000 { usb_1_ssphy: usb3-phy@8903400 { reg = <0 0x08903400 0 0x100>, - <0 0x08903c00 0 0x3ec>, + <0 0x08903600 0 0x3ec>, <0 0x08904400 0 0x1f0>, <0 0x08903a00 0 0x100>, <0 0x08903c00 0 0x3ec>, From 8723c3f290c7b798c0cbd89998576e6b365bda3a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 19 Sep 2022 11:44:53 +0200 Subject: [PATCH 029/963] arm64: dts: qcom: sc8280xp: fix USB PHY PCS registers With the current binding, the PCS register block (0x1400) needs to include the PCS_USB registers (0x1700). Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220919094454.1574-4-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index fce05d69ffbe..2cf2b3d83631 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -1181,7 +1181,7 @@ usb_0_qmpphy: phy-wrapper@88ec000 { usb_0_ssphy: usb3-phy@88eb400 { reg = <0 0x088eb400 0 0x100>, <0 0x088eb600 0 0x3ec>, - <0 0x088ec400 0 0x1f0>, + <0 0x088ec400 0 0x364>, <0 0x088eba00 0 0x100>, <0 0x088ebc00 0 0x3ec>, <0 0x088ec200 0 0x18>; @@ -1243,7 +1243,7 @@ usb_1_qmpphy: phy-wrapper@8904000 { usb_1_ssphy: usb3-phy@8903400 { reg = <0 0x08903400 0 0x100>, <0 0x08903600 0 0x3ec>, - <0 0x08904400 0 0x1f0>, + <0 0x08904400 0 0x364>, <0 0x08903a00 0 0x100>, <0 0x08903c00 0 0x3ec>, <0 0x08904200 0 0x18>; From 7cdfb7a54ac88f7cb6d830ebb78bdbcbcb44bb4c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 19 Sep 2022 11:44:54 +0200 Subject: [PATCH 030/963] arm64: dts: qcom: sc8280xp: drop broken DP PHY nodes The DP PHY register layout of the current binding do not apply to the newer USB4/USB3/DP PHY which uses a different register layout entirely. Drop the DP PHY subnodes until the binding has been updated to prevent the driver from corrupting unrelated registers. Note that this is also needed in order to not break USB with an upcoming PHY driver change that checks for overlapping register regions. Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Signed-off-by: Johan Hovold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220919094454.1574-5-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 2cf2b3d83631..6ed4ab2c08b4 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -1191,16 +1191,6 @@ usb_0_ssphy: usb3-phy@88eb400 { clock-names = "pipe0"; clock-output-names = "usb0_phy_pipe_clk_src"; }; - - usb_0_dpphy: dp-phy@88ed200 { - reg = <0 0x088ed200 0 0x200>, - <0 0x088ed400 0 0x200>, - <0 0x088eda00 0 0x200>, - <0 0x088ea600 0 0x200>, - <0 0x088ea800 0 0x200>; - #clock-cells = <1>; - #phy-cells = <0>; - }; }; usb_1_hsphy: phy@8902000 { @@ -1253,16 +1243,6 @@ usb_1_ssphy: usb3-phy@8903400 { clock-names = "pipe0"; clock-output-names = "usb1_phy_pipe_clk_src"; }; - - usb_1_dpphy: dp-phy@8904200 { - reg = <0 0x08904200 0 0x200>, - <0 0x08904400 0 0x200>, - <0 0x08904a00 0 0x200>, - <0 0x08904600 0 0x200>, - <0 0x08904800 0 0x200>; - #clock-cells = <1>; - #phy-cells = <0>; - }; }; system-cache-controller@9200000 { From 8703d55bd5eac642275fe91b34ac62ad0ad312b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 15 Sep 2022 16:16:01 +0200 Subject: [PATCH 031/963] arm64: dts: qcom: sc8280xp: fix UFS PHY serdes size The size of the UFS PHY serdes register region is 0x1c8 and the corresponding 'reg' property should specifically not include the adjacent regions that are defined in the child node (e.g. tx and rx). Fixes: 152d1faf1e2f ("arm64: dts: qcom: add SC8280XP platform") Signed-off-by: Johan Hovold Tested-by: Andrew Halaney #Qdrive3/sa8540p-adp-ride Reviewed-by: Brian Masney Reviewed-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220915141601.18435-1-johan+linaro@kernel.org --- arch/arm64/boot/dts/qcom/sc8280xp.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi index 6ed4ab2c08b4..212d63d5cbf2 100644 --- a/arch/arm64/boot/dts/qcom/sc8280xp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8280xp.dtsi @@ -885,7 +885,7 @@ ufs_mem_hc: ufs@1d84000 { ufs_mem_phy: phy@1d87000 { compatible = "qcom,sc8280xp-qmp-ufs-phy"; - reg = <0 0x01d87000 0 0xe10>; + reg = <0 0x01d87000 0 0x1c8>; #address-cells = <2>; #size-cells = <2>; ranges; @@ -953,7 +953,7 @@ ufs_card_hc: ufs@1da4000 { ufs_card_phy: phy@1da7000 { compatible = "qcom,sc8280xp-qmp-ufs-phy"; - reg = <0 0x01da7000 0 0xe10>; + reg = <0 0x01da7000 0 0x1c8>; #address-cells = <2>; #size-cells = <2>; ranges; From cb1d0aaa674e99957b85af570cb2730145af01df Mon Sep 17 00:00:00 2001 From: Satya Priya Date: Tue, 20 Sep 2022 17:18:15 +0530 Subject: [PATCH 032/963] arm64: dts: qcom: sc7280: Add the reset reg for lpass audiocc on SC7280 Add the reset register offset for clock gating. Fixes: 9499240d15f2 ("arm64: dts: qcom: sc7280: Add lpasscore & lpassaudio clock controllers") Signed-off-by: Satya Priya Reviewed-by: Neil Armstrong Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/1663674495-25748-1-git-send-email-quic_c_skakit@quicinc.com --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 212580316d3e..4cdc88d33944 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -2296,7 +2296,8 @@ swr1: soundwire@3230000 { lpass_audiocc: clock-controller@3300000 { compatible = "qcom,sc7280-lpassaudiocc"; - reg = <0 0x03300000 0 0x30000>; + reg = <0 0x03300000 0 0x30000>, + <0 0x032a9000 0 0x1000>; clocks = <&rpmhcc RPMH_CXO_CLK>, <&lpass_aon LPASS_AON_CC_MAIN_RCG_CLK_SRC>; clock-names = "bi_tcxo", "lpass_aon_cc_main_rcg_clk_src"; From ef80c95c29dc67c3034f32d93c41e2ede398e387 Mon Sep 17 00:00:00 2001 From: David Virag Date: Thu, 13 Oct 2022 17:13:40 +0200 Subject: [PATCH 033/963] clk: samsung: exynos7885: Correct "div4" clock parents "div4" DIVs which divide PLLs by 4 are actually dividing "div2" DIVs by 2 to achieve a by 4 division, thus their parents are the respective "div2" DIVs. These DIVs were mistakenly set to have the PLLs as parents. This leads to the kernel thinking "div4"s and everything under them run at 2x the clock speed. Fix this. Fixes: 45bd8166a1d8 ("clk: samsung: Add initial Exynos7885 clock driver") Signed-off-by: David Virag Acked-by: Chanwoo Choi Link: https://lore.kernel.org/r/20221013151341.151208-1-virag.david003@gmail.com Signed-off-by: Krzysztof Kozlowski --- drivers/clk/samsung/clk-exynos7885.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos7885.c b/drivers/clk/samsung/clk-exynos7885.c index 62ce6814f141..0d2a950ed184 100644 --- a/drivers/clk/samsung/clk-exynos7885.c +++ b/drivers/clk/samsung/clk-exynos7885.c @@ -231,7 +231,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED0_DIV2, 0, 1), DIV(CLK_DOUT_SHARED0_DIV3, "dout_shared0_div3", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "fout_shared0_pll", + DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "dout_shared0_div2", CLK_CON_DIV_PLL_SHARED0_DIV4, 0, 1), DIV(CLK_DOUT_SHARED0_DIV5, "dout_shared0_div5", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV5, 0, 3), @@ -239,7 +239,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED1_DIV2, 0, 1), DIV(CLK_DOUT_SHARED1_DIV3, "dout_shared1_div3", "fout_shared1_pll", CLK_CON_DIV_PLL_SHARED1_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "fout_shared1_pll", + DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "dout_shared1_div2", CLK_CON_DIV_PLL_SHARED1_DIV4, 0, 1), /* CORE */ From 4b549ccce941798703f159b227aa28c716aa78fa Mon Sep 17 00:00:00 2001 From: Christian Langrock Date: Mon, 17 Oct 2022 08:34:47 +0200 Subject: [PATCH 034/963] xfrm: replay: Fix ESN wrap around for GSO When using GSO it can happen that the wrong seq_hi is used for the last packets before the wrap around. This can lead to double usage of a sequence number. To avoid this, we should serialize this last GSO packet. Fixes: d7dbefc45cf5 ("xfrm: Add xfrm_replay_overflow functions for offloading") Co-developed-by: Steffen Klassert Signed-off-by: Christian Langrock Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 3 +++ net/ipv6/esp6_offload.c | 3 +++ net/xfrm/xfrm_device.c | 15 ++++++++++++++- net/xfrm/xfrm_replay.c | 2 +- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 170152772d33..3969fa805679 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -314,6 +314,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); ip_hdr(skb)->tot_len = htons(skb->len); diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 79d43548279c..242f4295940e 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -346,6 +346,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features xo->seq.low += skb_shinfo(skb)->gso_segs; } + if (xo->seq.low < seq) + xo->seq.hi++; + esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); len = skb->len - sizeof(struct ipv6hdr); diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f5aafd418af..21269e8f2db4 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -97,6 +97,18 @@ static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) } } +static inline bool xmit_xfrm_check_overflow(struct sk_buff *skb) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + __u32 seq = xo->seq.low; + + seq += skb_shinfo(skb)->gso_segs; + if (unlikely(seq < xo->seq.low)) + return true; + + return false; +} + struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features, bool *again) { int err; @@ -134,7 +146,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur return skb; } - if (skb_is_gso(skb) && unlikely(x->xso.dev != dev)) { + if (skb_is_gso(skb) && (unlikely(x->xso.dev != dev) || + unlikely(xmit_xfrm_check_overflow(skb)))) { struct sk_buff *segs; /* Packet got rerouted, fixup features and segment it. */ diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 9f4d42eb090f..ce56d659c55a 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -714,7 +714,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff oseq += skb_shinfo(skb)->gso_segs; } - if (unlikely(oseq < replay_esn->oseq)) { + if (unlikely(xo->seq.low < replay_esn->oseq)) { XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; xo->seq.hi = oseq_hi; replay_esn->oseq_hi = oseq_hi; From 1498a5a79980fb86458297df2b4aaa9cc56503b3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 11 Oct 2022 10:07:44 -0400 Subject: [PATCH 035/963] dt-bindings: interconnect: qcom,msm8998-bwmon: Correct SC7280 CPU compatible Two different compatibles for SC7280 CPU BWMON instance were used in DTS and bindings. Correct the bindings to use the same one as in DTS, because it is more specific. Fixes: b7c84ae757c2 ("dt-bindings: interconnect: qcom,msm8998-bwmon: Add support for sc7280 BWMONs") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Rajendra Nayak Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221011140744.29829-1-krzysztof.kozlowski@linaro.org Signed-off-by: Georgi Djakov --- .../devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml index 2684562df4d9..be29e0b80995 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml @@ -24,7 +24,7 @@ properties: oneOf: - items: - enum: - - qcom,sc7280-bwmon + - qcom,sc7280-cpu-bwmon - qcom,sdm845-bwmon - const: qcom,msm8998-bwmon - const: qcom,msm8998-bwmon # BWMON v4 From 172c65e673cc3998059c9dfb3faf48e72e6fc236 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 15 Oct 2022 11:26:37 +0300 Subject: [PATCH 036/963] power: supply: rk817: check correct variable This code checks "->chg_ps" twice when it was supposed to check "->bat_ps". Fixes: 11cb8da0189b ("power: supply: Add charger driver for Rockchip RK817") Signed-off-by: Dan Carpenter Signed-off-by: Sebastian Reichel --- drivers/power/supply/rk817_charger.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 635f051b0821..4e9c7b191266 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -1116,14 +1116,12 @@ static int rk817_charger_probe(struct platform_device *pdev) charger->bat_ps = devm_power_supply_register(&pdev->dev, &rk817_bat_desc, &pscfg); - - charger->chg_ps = devm_power_supply_register(&pdev->dev, - &rk817_chg_desc, &pscfg); - - if (IS_ERR(charger->chg_ps)) + if (IS_ERR(charger->bat_ps)) return dev_err_probe(dev, -EINVAL, "Battery failed to probe\n"); + charger->chg_ps = devm_power_supply_register(&pdev->dev, + &rk817_chg_desc, &pscfg); if (IS_ERR(charger->chg_ps)) return dev_err_probe(dev, -EINVAL, "Charger failed to probe\n"); From 883babd43dcf8b44db36d015a08cbf0066a5270d Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Mon, 10 Oct 2022 09:13:29 -0500 Subject: [PATCH 037/963] power: supply: rk817: Change rk817_chg_cur_to_reg to int Change return value of rk817_chg_cur_to_reg from u8 to int. If the function fails to find a suitable value it returns a -EINVAL, but defined as a u8 it would not return correctly. Additionally, change defined variable that stores return value from u8 to int. Fixes: 11cb8da0189b ("power: supply: Add charger driver for Rockchip RK817") Signed-off-by: Chris Morgan Reported-by: Dan Carpenter Signed-off-by: Sebastian Reichel --- drivers/power/supply/rk817_charger.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c index 4e9c7b191266..f20a6ac584cc 100644 --- a/drivers/power/supply/rk817_charger.c +++ b/drivers/power/supply/rk817_charger.c @@ -121,7 +121,7 @@ struct rk817_charger { #define ADC_TO_CHARGE_UAH(adc_value, res_div) \ (adc_value / 3600 * 172 / res_div) -static u8 rk817_chg_cur_to_reg(u32 chg_cur_ma) +static int rk817_chg_cur_to_reg(u32 chg_cur_ma) { if (chg_cur_ma >= 3500) return CHG_3_5A; @@ -864,8 +864,8 @@ static int rk817_battery_init(struct rk817_charger *charger, { struct rk808 *rk808 = charger->rk808; u32 tmp, max_chg_vol_mv, max_chg_cur_ma; - u8 max_chg_vol_reg, chg_term_i_reg, max_chg_cur_reg; - int ret, chg_term_ma; + u8 max_chg_vol_reg, chg_term_i_reg; + int ret, chg_term_ma, max_chg_cur_reg; u8 bulk_reg[2]; /* Get initial plug state */ From 91e8b74fe6381e083f8aa55217bb0562785ab398 Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Wed, 19 Oct 2022 16:27:27 +0200 Subject: [PATCH 038/963] arm64: dts: rockchip: lower rk3399-puma-haikou SD controller clock frequency CRC errors (code -84 EILSEQ) have been observed for some SanDisk Ultra A1 cards when running at 50MHz. Waveform analysis suggest that the level shifters that are used on the RK3399-Q7 module for voltage translation between 3.0 and 3.3V don't handle clock rates at or above 48MHz properly. Back off to 40MHz for some safety margin. Cc: stable@vger.kernel.org Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Signed-off-by: Jakob Unterwurzacher Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20221019-upstream-puma-sd-40mhz-v1-0-754a76421518@theobroma-systems.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 04c752f49be9..115c14c0a3c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -207,7 +207,7 @@ &sdmmc { cap-sd-highspeed; cd-gpios = <&gpio0 RK_PA7 GPIO_ACTIVE_LOW>; disable-wp; - max-frequency = <150000000>; + max-frequency = <40000000>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; vmmc-supply = <&vcc3v3_baseboard>; From 54de93cd8740d52a83728802b4270f953d1a636f Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Thu, 20 Oct 2022 17:06:45 +0200 Subject: [PATCH 039/963] kernel/utsname_sysctl.c: Add missing enum uts_proc value bfca3dd3d068 added new struct ctl_table uts_kern_table[], but not new enum uts_proc value. It broke the notification mechanism between the sethostname syscall and the pollers of /proc/sys/kernel/hostname. The table uts_kern_table is addressed within uts_proc_notify by the enum value, that's why new enum value is needed. Fixes: bfca3dd3d068 ("kernel/utsname_sysctl.c: print kernel arch") Reported-by: Torsten Hilbrich Signed-off-by: Petr Vorel Link: https://lore.kernel.org/r/20221020150645.11719-1-pvorel@suse.cz Signed-off-by: Greg Kroah-Hartman --- include/linux/utsname.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/utsname.h b/include/linux/utsname.h index 2b1737c9b244..bf7613ba412b 100644 --- a/include/linux/utsname.h +++ b/include/linux/utsname.h @@ -10,6 +10,7 @@ #include enum uts_proc { + UTS_PROC_ARCH, UTS_PROC_OSTYPE, UTS_PROC_OSRELEASE, UTS_PROC_VERSION, From 2b4337c8409b4e9e5aed15c597e4031dd567bdd8 Mon Sep 17 00:00:00 2001 From: Mihai Sain Date: Mon, 17 Oct 2022 11:31:19 +0300 Subject: [PATCH 040/963] ARM: dts: at91: sama7g5: fix signal name of pin PB2 The signal name of pin PB2 with function F is FLEXCOM11_IO1 as it is defined in the datasheet. Fixes: 7540629e2fc7 ("ARM: dts: at91: add sama7g5 SoC DT and sama7g5-ek") Signed-off-by: Mihai Sain Reviewed-by: Tudor Ambarus Acked-by: Nicolas Ferre Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221017083119.1643-1-mihai.sain@microchip.com --- arch/arm/boot/dts/sama7g5-pinfunc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama7g5-pinfunc.h b/arch/arm/boot/dts/sama7g5-pinfunc.h index 4eb30445d205..6e87f0d4b8fc 100644 --- a/arch/arm/boot/dts/sama7g5-pinfunc.h +++ b/arch/arm/boot/dts/sama7g5-pinfunc.h @@ -261,7 +261,7 @@ #define PIN_PB2__FLEXCOM6_IO0 PINMUX_PIN(PIN_PB2, 2, 1) #define PIN_PB2__ADTRG PINMUX_PIN(PIN_PB2, 3, 1) #define PIN_PB2__A20 PINMUX_PIN(PIN_PB2, 4, 1) -#define PIN_PB2__FLEXCOM11_IO0 PINMUX_PIN(PIN_PB2, 6, 3) +#define PIN_PB2__FLEXCOM11_IO1 PINMUX_PIN(PIN_PB2, 6, 3) #define PIN_PB3 35 #define PIN_PB3__GPIO PINMUX_PIN(PIN_PB3, 0, 0) #define PIN_PB3__RF1 PINMUX_PIN(PIN_PB3, 1, 1) From efa17e90e1711bdb084e3954fa44afb6647331c0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 22 Oct 2022 15:42:12 +0800 Subject: [PATCH 041/963] iio: trigger: sysfs: fix possible memory leak in iio_sysfs_trig_init() dev_set_name() allocates memory for name, it need be freed when device_add() fails, call put_device() to give up the reference that hold in device_initialize(), so that it can be freed in kobject_cleanup() when the refcount hit to 0. Fault injection test can trigger this: unreferenced object 0xffff8e8340a7b4c0 (size 32): comm "modprobe", pid 243, jiffies 4294678145 (age 48.845s) hex dump (first 32 bytes): 69 69 6f 5f 73 79 73 66 73 5f 74 72 69 67 67 65 iio_sysfs_trigge 72 00 a7 40 83 8e ff ff 00 86 13 c4 f6 ee ff ff r..@............ backtrace: [<0000000074999de8>] __kmem_cache_alloc_node+0x1e9/0x360 [<00000000497fd30b>] __kmalloc_node_track_caller+0x44/0x1a0 [<000000003636c520>] kstrdup+0x2d/0x60 [<0000000032f84da2>] kobject_set_name_vargs+0x1e/0x90 [<0000000092efe493>] dev_set_name+0x4e/0x70 Fixes: 1f785681a870 ("staging:iio:trigger sysfs userspace trigger rework.") Signed-off-by: Yang Yingliang Cc: Link: https://lore.kernel.org/r/20221022074212.1386424-1-yangyingliang@huawei.com Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/iio-trig-sysfs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c index d6c5e9644738..6b05eed41612 100644 --- a/drivers/iio/trigger/iio-trig-sysfs.c +++ b/drivers/iio/trigger/iio-trig-sysfs.c @@ -203,9 +203,13 @@ static int iio_sysfs_trigger_remove(int id) static int __init iio_sysfs_trig_init(void) { + int ret; device_initialize(&iio_sysfs_trig_dev); dev_set_name(&iio_sysfs_trig_dev, "iio_sysfs_trigger"); - return device_add(&iio_sysfs_trig_dev); + ret = device_add(&iio_sysfs_trig_dev); + if (ret) + put_device(&iio_sysfs_trig_dev); + return ret; } module_init(iio_sysfs_trig_init); From 207777dc306a732cff76ab63bf19a7e0851410b3 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 20 Oct 2022 13:27:05 +0300 Subject: [PATCH 042/963] iio: adc: at91-sama5d2_adc: get rid of 5 degrees Celsius adjustment On SAMA7G5 final chip version there is no need for 5 degrees Celsius adjustment when computing junction temperature, thus, remove it. Fixes: 5ab38b81895c ("iio: adc: at91-sama5d2_adc: add support for temperature sensor") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221020102705.3639204-1-claudiu.beznea@microchip.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 33e251552214..870f4cb60923 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -2307,11 +2307,9 @@ static int at91_adc_temp_sensor_init(struct at91_adc_state *st, clb->p6 = buf[AT91_ADC_TS_CLB_IDX_P6]; /* - * We prepare here the conversion to milli and also add constant - * factor (5 degrees Celsius) to p1 here to avoid doing it on - * hotpath. + * We prepare here the conversion to milli to avoid doing it on hotpath. */ - clb->p1 = clb->p1 * 1000 + 5000; + clb->p1 = clb->p1 * 1000; free_buf: kfree(buf); From 937ec9f7d5f2625d60077bb7824fee35dc447c6e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 25 Oct 2022 14:21:50 +0200 Subject: [PATCH 043/963] staging: rtl8192e: remove bogus ssid character sign test This error triggers on some architectures with unsigned `char` types: drivers/staging/rtl8192e/rtllib_softmac_wx.c:459 rtllib_wx_set_essid() warn: impossible condition '(extra[i] < 0) => (0-255 < 0)' But actually, the entire test is bogus, as ssids don't have any sign validity rules like that. So just remove this check look all together. Cc: Greg Kroah-Hartman Cc: linux-staging@lists.linux.dev Signed-off-by: Jason A. Donenfeld Reviewed-by: Dan Carpenter Link: https://lore.kernel.org/r/20221025122150.583617-1-Jason@zx2c4.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8192e/rtllib_softmac_wx.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c index f9589c5b62ba..1e5ad3b476ef 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c +++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c @@ -439,7 +439,7 @@ int rtllib_wx_set_essid(struct rtllib_device *ieee, union iwreq_data *wrqu, char *extra) { - int ret = 0, len, i; + int ret = 0, len; short proto_started; unsigned long flags; @@ -455,13 +455,6 @@ int rtllib_wx_set_essid(struct rtllib_device *ieee, goto out; } - for (i = 0; i < len; i++) { - if (extra[i] < 0) { - ret = -1; - goto out; - } - } - if (proto_started) rtllib_stop_protocol(ieee, true); From 1683d3282f240336a2b4b6b541d435facfe8bbb6 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 25 Oct 2022 16:01:49 +0100 Subject: [PATCH 044/963] ASoC: dapm: Don't use prefix for regulator name When a component has a prefix, and uses a SND_SOC_DAPM_REGULATOR_SUPPLY, the name of the regulator should not use the prefix, otherwise it won't be properly matched in the DT/ACPI. Fixes: 3caac759681e ("ASoC: soc-dapm.c: fixup snd_soc_dapm_new_control_unlocked() error handling") Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20221025150149.113129-1-paul@crapouillou.net Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index d515e7a78ea8..879cf1be67a9 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3645,7 +3645,7 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, switch (w->id) { case snd_soc_dapm_regulator_supply: - w->regulator = devm_regulator_get(dapm->dev, w->name); + w->regulator = devm_regulator_get(dapm->dev, widget->name); if (IS_ERR(w->regulator)) { ret = PTR_ERR(w->regulator); goto request_failed; From d40b6529c6269cd5afddb1116a383cab9f126694 Mon Sep 17 00:00:00 2001 From: Brent Mendelsohn Date: Mon, 24 Oct 2022 18:42:27 +0100 Subject: [PATCH 045/963] ASoC: amd: yc: Add Alienware m17 R5 AMD into DMI table This model requires an additional detection quirk to enable the internal microphone - BIOS doesn't seem to support AcpDmicConnected (nothing in acpidump output). Link: https://bugzilla.kernel.org/show_bug.cgi?id=216590 Signed-off-by: Brent Mendelsohn Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20221024174227.4160-1-mendiebm@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 6c0f1de10429..d9715bea965e 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -206,6 +206,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "UM5302TA"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"), + } + }, {} }; From 8bb0ac0e6f64ebdf15d963c26b028de391c9bcf9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 25 Oct 2022 16:09:42 +0200 Subject: [PATCH 046/963] ASoC: Intel: bytcht_es8316: Add quirk for the Nanote UMPC-01 The Nanote UMPC-01 mini laptop has stereo speakers, while the default bytcht_es8316 settings assume a mono speaker setup. Add a quirk for this. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221025140942.509066-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcht_es8316.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index 6432b83f616f..a935c5fd9edb 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -443,6 +443,13 @@ static const struct dmi_system_id byt_cht_es8316_quirk_table[] = { | BYT_CHT_ES8316_INTMIC_IN2_MAP | BYT_CHT_ES8316_JD_INVERTED), }, + { /* Nanote UMPC-01 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "RWC CO.,LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "UMPC-01"), + }, + .driver_data = (void *)BYT_CHT_ES8316_INTMIC_IN1_MAP, + }, { /* Teclast X98 Plus II */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "TECLAST"), From 69d1abc0214e944dff1d30e201f8fc332a1adf1a Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Tue, 25 Oct 2022 10:49:29 +0800 Subject: [PATCH 047/963] MAINTAINERS: update Tzung-Bi's email address Use kernel.org account instead. Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20221025024929.2652134-1-tzungbi@kernel.org Signed-off-by: Mark Brown --- .mailmap | 1 + .../devicetree/bindings/sound/google,cros-ec-codec.yaml | 2 +- Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml | 2 +- MAINTAINERS | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index 380378e2db36..84342d781407 100644 --- a/.mailmap +++ b/.mailmap @@ -414,6 +414,7 @@ TripleX Chung TripleX Chung Tsuneo Yoshioka Tycho Andersen +Tzung-Bi Shih Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml index c3e9f3485449..dea293f403d9 100644 --- a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml +++ b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml @@ -8,7 +8,7 @@ title: Audio codec controlled by ChromeOS EC maintainers: - Cheng-Yi Chiang - - Tzung-Bi Shih + - Tzung-Bi Shih description: | Google's ChromeOS EC codec is a digital mic codec provided by the diff --git a/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml b/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml index 1d73204451b1..ea7d4900ee4a 100644 --- a/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml +++ b/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Realtek rt1015p codec devicetree bindings maintainers: - - Tzung-Bi Shih + - Tzung-Bi Shih description: | Rt1015p is a rt1015 variant which does not support I2C and diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..f9749afc0b9d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4906,7 +4906,7 @@ F: drivers/platform/chrome/ CHROMEOS EC CODEC DRIVER M: Cheng-Yi Chiang -M: Tzung-Bi Shih +M: Tzung-Bi Shih R: Guenter Roeck L: chrome-platform@lists.linux.dev S: Maintained From df3414b0a245f43476061fddd78cee7d6cff797f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Monin?= Date: Thu, 13 Oct 2022 16:26:48 +0200 Subject: [PATCH 048/963] USB: serial: option: add Sierra Wireless EM9191 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the AT and diag ports, similar to other qualcomm SDX55 modems. In QDL mode, the modem uses a different device ID and support is provided by qcserial in commit 11c52d250b34 ("USB: serial: qcserial: add EM9191 QDL support"). T: Bus=08 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 3 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 P: Vendor=1199 ProdID=90d3 Rev=00.06 S: Manufacturer=Sierra Wireless, Incorporated S: Product=Sierra Wireless EM9191 S: SerialNumber=xxxxxxxxxxxxxxxx C: #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=896mA I: If#=0x0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0e Prot=00 Driver=cdc_mbim I: If#=0x1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I: If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) Signed-off-by: Benoît Monin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 697683e3fbff..37257a52287d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -581,6 +581,9 @@ static void option_instat_callback(struct urb *urb); #define OPPO_VENDOR_ID 0x22d9 #define OPPO_PRODUCT_R11 0x276c +/* Sierra Wireless products */ +#define SIERRA_VENDOR_ID 0x1199 +#define SIERRA_PRODUCT_EM9191 0x90d3 /* Device flags */ @@ -2176,6 +2179,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From 7f57f8165cb6d2c206e2b9ada53b9e2d6d8af42f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 25 Oct 2022 14:06:48 +0800 Subject: [PATCH 049/963] af_key: Fix send_acquire race with pfkey_register The function pfkey_send_acquire may race with pfkey_register (which could even be in a different name space). This may result in a buffer overrun. Allocating the maximum amount of memory that could be used prevents this. Reported-by: syzbot+1e9af9185d8850e2c2fa@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Herbert Xu Reviewed-by: Sabrina Dubroca Reviewed-by: Eric Dumazet Signed-off-by: Steffen Klassert --- net/key/af_key.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index c85df5b958d2..213287814328 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2905,7 +2905,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2923,7 +2923,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { @@ -2934,16 +2934,17 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } -static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -2971,13 +2972,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } -static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) +static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; + int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); @@ -3019,8 +3024,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; + sz += sizeof(*c); } } + + return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) @@ -3150,6 +3158,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; + int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -3161,16 +3170,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) - size += count_ah_combs(t); + alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) - size += count_esp_combs(t); + alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } - skb = alloc_skb(size + 16, GFP_ATOMIC); + skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -3224,10 +3233,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ + alg_size = 0; if (x->id.proto == IPPROTO_AH) - dump_ah_combs(skb, t); + alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) - dump_esp_combs(skb, t); + alg_size = dump_esp_combs(skb, t); + + hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { From 6ec27c53886c8963729885bcf2dd996eba2767a7 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 28 Oct 2022 11:16:03 +0800 Subject: [PATCH 050/963] ASoC: core: Fix use-after-free in snd_soc_exit() KASAN reports a use-after-free: BUG: KASAN: use-after-free in device_del+0xb5b/0xc60 Read of size 8 at addr ffff888008655050 by task rmmod/387 CPU: 2 PID: 387 Comm: rmmod Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Call Trace: dump_stack_lvl+0x79/0x9a print_report+0x17f/0x47b kasan_report+0xbb/0xf0 device_del+0xb5b/0xc60 platform_device_del.part.0+0x24/0x200 platform_device_unregister+0x2e/0x40 snd_soc_exit+0xa/0x22 [snd_soc_core] __do_sys_delete_module.constprop.0+0x34f/0x5b0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd ... It's bacause in snd_soc_init(), snd_soc_util_init() is possble to fail, but its ret is ignored, which makes soc_dummy_dev unregistered twice. snd_soc_init() snd_soc_util_init() platform_device_register_simple(soc_dummy_dev) platform_driver_register() # fail platform_device_unregister(soc_dummy_dev) platform_driver_register() # success ... snd_soc_exit() snd_soc_util_exit() # soc_dummy_dev will be unregistered for second time To fix it, handle error and stop snd_soc_init() when util_init() fail. Also clean debugfs when util_init() or driver_register() fail. Fixes: fb257897bf20 ("ASoC: Work around allmodconfig failure") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221028031603.59416-1-chenzhongjin@huawei.com Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 12a82f5a3ff6..a409fbed8f34 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3477,10 +3477,23 @@ EXPORT_SYMBOL_GPL(snd_soc_of_get_dai_link_cpus); static int __init snd_soc_init(void) { - snd_soc_debugfs_init(); - snd_soc_util_init(); + int ret; - return platform_driver_register(&soc_driver); + snd_soc_debugfs_init(); + ret = snd_soc_util_init(); + if (ret) + goto err_util_init; + + ret = platform_driver_register(&soc_driver); + if (ret) + goto err_register; + return 0; + +err_register: + snd_soc_util_exit(); +err_util_init: + snd_soc_debugfs_exit(); + return ret; } module_init(snd_soc_init); From 6a564338a23cefcfc29c4a535b98402d13efdda6 Mon Sep 17 00:00:00 2001 From: Maarten Zanders Date: Fri, 28 Oct 2022 16:11:28 +0200 Subject: [PATCH 051/963] ASoC: fsl_asrc fsl_esai fsl_sai: allow CONFIG_PM=N When CONFIG_PM=N, pm_runtime_put_sync() returns -ENOSYS which breaks the probe function of these drivers. Other users of pm_runtime_put_sync() typically don't check the return value. In order to keep the program flow as intended, check for -ENOSYS. This commit is similar to commit 0434d3f (omap-mailbox.c). Fixes: cab04ab5900f ("ASoC: fsl_asrc: Don't use devm_regmap_init_mmio_clk") Fixes: 203773e39347 ("ASoC: fsl_esai: Don't use devm_regmap_init_mmio_clk") Fixes: 2277e7e36b4b ("ASoC: fsl_sai: Don't use devm_regmap_init_mmio_clk") Signed-off-by: Maarten Zanders Reviewed-by: Daniel Baluta Link: https://lore.kernel.org/r/20221028141129.100702-1-maarten.zanders@mind.be Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_asrc.c | 2 +- sound/soc/fsl/fsl_esai.c | 2 +- sound/soc/fsl/fsl_sai.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 936aef5d2767..e16e7b3fa96c 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -1232,7 +1232,7 @@ static int fsl_asrc_probe(struct platform_device *pdev) } ret = pm_runtime_put_sync(&pdev->dev); - if (ret < 0) + if (ret < 0 && ret != -ENOSYS) goto err_pm_get_sync; ret = devm_snd_soc_register_component(&pdev->dev, &fsl_asrc_component, diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c index 5c21fc490fce..17fefd27ec90 100644 --- a/sound/soc/fsl/fsl_esai.c +++ b/sound/soc/fsl/fsl_esai.c @@ -1069,7 +1069,7 @@ static int fsl_esai_probe(struct platform_device *pdev) regmap_write(esai_priv->regmap, REG_ESAI_RSMB, 0); ret = pm_runtime_put_sync(&pdev->dev); - if (ret < 0) + if (ret < 0 && ret != -ENOSYS) goto err_pm_get_sync; /* diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 81f89f6767a2..e60c7b344562 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -1446,7 +1446,7 @@ static int fsl_sai_probe(struct platform_device *pdev) } ret = pm_runtime_put_sync(dev); - if (ret < 0) + if (ret < 0 && ret != -ENOSYS) goto err_pm_get_sync; /* From 39a654f39efb035b961359db91f15a347e8d7fd8 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Fri, 28 Oct 2022 21:30:29 +0530 Subject: [PATCH 052/963] MAINTAINERS: Add Manivannan Sadhasivam as Qcom PCIe RC maintainer Stan is moving out of mm-sol and decided not to carry on the maintainership duties of the Qcom PCIe RC driver. Since I'm already maintaining the Qcom PCIe EP driver, I'm volunteering myself to maintain the RC driver also. Link: https://lore.kernel.org/r/20221028160029.44483-1-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Bjorn Helgaas Cc: Stanimir Varbanov --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..05ca3a7f6967 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16036,7 +16036,7 @@ F: Documentation/devicetree/bindings/pci/microchip* F: drivers/pci/controller/*microchip* PCIE DRIVER FOR QUALCOMM MSM -M: Stanimir Varbanov +M: Manivannan Sadhasivam L: linux-pci@vger.kernel.org L: linux-arm-msm@vger.kernel.org S: Maintained From cadaa773bcf161184fa428180516bae33a7bc667 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 27 Oct 2022 13:57:45 +0200 Subject: [PATCH 053/963] arm64: dts: qcom: sm8250: Disable the not yet supported cluster idle state To support the deeper cluster idle state for sm8250 platforms, some additional synchronization is needed between the rpmh-rsc device and the CPU cluster PM domain. Until that is supported, let's disable the cluster idle state. This fixes a problem that has been reported for the Qcom RB5 platform (see below), but most likely other sm8250 platforms suffers from similar issues, so let's make the fix generic for sm8250. vreg_l11c_3p3: failed to enable: -ETIMEDOUT qcom-rpmh-regulator 18200000.rsc:pm8150l-rpmh-regulators: ldo11: devm_regulator_register() failed, ret=-110 qcom-rpmh-regulator: probe of 18200000.rsc:pm8150l-rpmh-regulators failed with error -110 Reported-by: Amit Pundir Fixes: 32bc936d7321 ("arm64: dts: qcom: sm8250: Add cpuidle states") Signed-off-by: Ulf Hansson Tested-by: Amit Pundir Reviewed-by: Sudeep Holla Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221027115745.240516-1-ulf.hansson@linaro.org --- arch/arm64/boot/dts/qcom/sm8250.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index a5b62cadb129..e276eed1f8e2 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -334,6 +334,7 @@ CLUSTER_SLEEP_0: cluster-sleep-0 { exit-latency-us = <6562>; min-residency-us = <9987>; local-timer-stop; + status = "disabled"; }; }; }; From 57572cacd36e6d4be7722d7770d23f4430219827 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 2 Oct 2022 15:41:33 +0100 Subject: [PATCH 054/963] iio: accel: bma400: Ensure VDDIO is enable defore reading the chip ID. The regulator enables were after the check on the chip variant, which was very unlikely to return a correct value when not powered. Presumably all the device anyone is testing on have a regulator that is already powered up when this code runs for reasons beyond the scope of this driver. Move the read call down a few lines. Fixes: 3cf7ded15e40 ("iio: accel: bma400: basic regulator support") Signed-off-by: Jonathan Cameron Reviewed-by: Dan Robertson Cc: Link: https://lore.kernel.org/r/20221002144133.3771029-1-jic23@kernel.org --- drivers/iio/accel/bma400_core.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index ad8fce3e08cd..490c342ef72a 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -869,18 +869,6 @@ static int bma400_init(struct bma400_data *data) unsigned int val; int ret; - /* Try to read chip_id register. It must return 0x90. */ - ret = regmap_read(data->regmap, BMA400_CHIP_ID_REG, &val); - if (ret) { - dev_err(data->dev, "Failed to read chip id register\n"); - return ret; - } - - if (val != BMA400_ID_REG_VAL) { - dev_err(data->dev, "Chip ID mismatch\n"); - return -ENODEV; - } - data->regulators[BMA400_VDD_REGULATOR].supply = "vdd"; data->regulators[BMA400_VDDIO_REGULATOR].supply = "vddio"; ret = devm_regulator_bulk_get(data->dev, @@ -906,6 +894,18 @@ static int bma400_init(struct bma400_data *data) if (ret) return ret; + /* Try to read chip_id register. It must return 0x90. */ + ret = regmap_read(data->regmap, BMA400_CHIP_ID_REG, &val); + if (ret) { + dev_err(data->dev, "Failed to read chip id register\n"); + return ret; + } + + if (val != BMA400_ID_REG_VAL) { + dev_err(data->dev, "Chip ID mismatch\n"); + return -ENODEV; + } + ret = bma400_get_power_mode(data); if (ret) { dev_err(data->dev, "Failed to get the initial power-mode\n"); From 17f442e7e47579d3881fc4d47354eaef09302e6f Mon Sep 17 00:00:00 2001 From: Mitja Spes Date: Fri, 21 Oct 2022 15:58:20 +0200 Subject: [PATCH 055/963] iio: pressure: ms5611: fixed value compensation bug When using multiple instances of this driver the compensation PROM was overwritten by the last initialized sensor. Now each sensor has own PROM storage. Signed-off-by: Mitja Spes Fixes: 9690d81a02dc ("iio: pressure: ms5611: add support for MS5607 temperature and pressure sensor") Link: https://lore.kernel.org/r/20221021135827.1444793-2-mitja@lxnav.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/ms5611.h | 12 +++---- drivers/iio/pressure/ms5611_core.c | 51 ++++++++++++++++-------------- 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/drivers/iio/pressure/ms5611.h b/drivers/iio/pressure/ms5611.h index cbc9349c342a..550b75b7186f 100644 --- a/drivers/iio/pressure/ms5611.h +++ b/drivers/iio/pressure/ms5611.h @@ -25,13 +25,6 @@ enum { MS5607, }; -struct ms5611_chip_info { - u16 prom[MS5611_PROM_WORDS_NB]; - - int (*temp_and_pressure_compensate)(struct ms5611_chip_info *chip_info, - s32 *temp, s32 *pressure); -}; - /* * OverSampling Rate descriptor. * Warning: cmd MUST be kept aligned on a word boundary (see @@ -50,12 +43,15 @@ struct ms5611_state { const struct ms5611_osr *pressure_osr; const struct ms5611_osr *temp_osr; + u16 prom[MS5611_PROM_WORDS_NB]; + int (*reset)(struct ms5611_state *st); int (*read_prom_word)(struct ms5611_state *st, int index, u16 *word); int (*read_adc_temp_and_pressure)(struct ms5611_state *st, s32 *temp, s32 *pressure); - struct ms5611_chip_info *chip_info; + int (*compensate_temp_and_pressure)(struct ms5611_state *st, s32 *temp, + s32 *pressure); struct regulator *vdd; }; diff --git a/drivers/iio/pressure/ms5611_core.c b/drivers/iio/pressure/ms5611_core.c index 717521de66c4..c564a1d6cafe 100644 --- a/drivers/iio/pressure/ms5611_core.c +++ b/drivers/iio/pressure/ms5611_core.c @@ -85,7 +85,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) struct ms5611_state *st = iio_priv(indio_dev); for (i = 0; i < MS5611_PROM_WORDS_NB; i++) { - ret = st->read_prom_word(st, i, &st->chip_info->prom[i]); + ret = st->read_prom_word(st, i, &st->prom[i]); if (ret < 0) { dev_err(&indio_dev->dev, "failed to read prom at %d\n", i); @@ -93,7 +93,7 @@ static int ms5611_read_prom(struct iio_dev *indio_dev) } } - if (!ms5611_prom_is_valid(st->chip_info->prom, MS5611_PROM_WORDS_NB)) { + if (!ms5611_prom_is_valid(st->prom, MS5611_PROM_WORDS_NB)) { dev_err(&indio_dev->dev, "PROM integrity check failed\n"); return -ENODEV; } @@ -114,21 +114,20 @@ static int ms5611_read_temp_and_pressure(struct iio_dev *indio_dev, return ret; } - return st->chip_info->temp_and_pressure_compensate(st->chip_info, - temp, pressure); + return st->compensate_temp_and_pressure(st, temp, pressure); } -static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info, +static int ms5611_temp_and_pressure_compensate(struct ms5611_state *st, s32 *temp, s32 *pressure) { s32 t = *temp, p = *pressure; s64 off, sens, dt; - dt = t - (chip_info->prom[5] << 8); - off = ((s64)chip_info->prom[2] << 16) + ((chip_info->prom[4] * dt) >> 7); - sens = ((s64)chip_info->prom[1] << 15) + ((chip_info->prom[3] * dt) >> 8); + dt = t - (st->prom[5] << 8); + off = ((s64)st->prom[2] << 16) + ((st->prom[4] * dt) >> 7); + sens = ((s64)st->prom[1] << 15) + ((st->prom[3] * dt) >> 8); - t = 2000 + ((chip_info->prom[6] * dt) >> 23); + t = 2000 + ((st->prom[6] * dt) >> 23); if (t < 2000) { s64 off2, sens2, t2; @@ -154,17 +153,17 @@ static int ms5611_temp_and_pressure_compensate(struct ms5611_chip_info *chip_inf return 0; } -static int ms5607_temp_and_pressure_compensate(struct ms5611_chip_info *chip_info, +static int ms5607_temp_and_pressure_compensate(struct ms5611_state *st, s32 *temp, s32 *pressure) { s32 t = *temp, p = *pressure; s64 off, sens, dt; - dt = t - (chip_info->prom[5] << 8); - off = ((s64)chip_info->prom[2] << 17) + ((chip_info->prom[4] * dt) >> 6); - sens = ((s64)chip_info->prom[1] << 16) + ((chip_info->prom[3] * dt) >> 7); + dt = t - (st->prom[5] << 8); + off = ((s64)st->prom[2] << 17) + ((st->prom[4] * dt) >> 6); + sens = ((s64)st->prom[1] << 16) + ((st->prom[3] * dt) >> 7); - t = 2000 + ((chip_info->prom[6] * dt) >> 23); + t = 2000 + ((st->prom[6] * dt) >> 23); if (t < 2000) { s64 off2, sens2, t2, tmp; @@ -342,15 +341,6 @@ static int ms5611_write_raw(struct iio_dev *indio_dev, static const unsigned long ms5611_scan_masks[] = {0x3, 0}; -static struct ms5611_chip_info chip_info_tbl[] = { - [MS5611] = { - .temp_and_pressure_compensate = ms5611_temp_and_pressure_compensate, - }, - [MS5607] = { - .temp_and_pressure_compensate = ms5607_temp_and_pressure_compensate, - } -}; - static const struct iio_chan_spec ms5611_channels[] = { { .type = IIO_PRESSURE, @@ -433,7 +423,20 @@ int ms5611_probe(struct iio_dev *indio_dev, struct device *dev, struct ms5611_state *st = iio_priv(indio_dev); mutex_init(&st->lock); - st->chip_info = &chip_info_tbl[type]; + + switch (type) { + case MS5611: + st->compensate_temp_and_pressure = + ms5611_temp_and_pressure_compensate; + break; + case MS5607: + st->compensate_temp_and_pressure = + ms5607_temp_and_pressure_compensate; + break; + default: + return -EINVAL; + } + st->temp_osr = &ms5611_avail_temp_osr[ARRAY_SIZE(ms5611_avail_temp_osr) - 1]; st->pressure_osr = From 741cec30cc52058d1c10d415f3b98319887e4f73 Mon Sep 17 00:00:00 2001 From: Mitja Spes Date: Fri, 21 Oct 2022 15:58:21 +0200 Subject: [PATCH 056/963] iio: pressure: ms5611: changed hardcoded SPI speed to value limited Don't hardcode the ms5611 SPI speed, limit it instead. Signed-off-by: Mitja Spes Fixes: c0644160a8b5 ("iio: pressure: add support for MS5611 pressure and temperature sensor") Link: https://lore.kernel.org/r/20221021135827.1444793-3-mitja@lxnav.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/ms5611_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/ms5611_spi.c b/drivers/iio/pressure/ms5611_spi.c index 432e912096f4..a0a7205c9c3a 100644 --- a/drivers/iio/pressure/ms5611_spi.c +++ b/drivers/iio/pressure/ms5611_spi.c @@ -91,7 +91,7 @@ static int ms5611_spi_probe(struct spi_device *spi) spi_set_drvdata(spi, indio_dev); spi->mode = SPI_MODE_0; - spi->max_speed_hz = 20000000; + spi->max_speed_hz = min(spi->max_speed_hz, 20000000U); spi->bits_per_word = 8; ret = spi_setup(spi); if (ret < 0) From 1eb20332a082fa801fb89c347c5e62de916a4001 Mon Sep 17 00:00:00 2001 From: Saravanan Sekar Date: Sat, 29 Oct 2022 11:29:53 +0200 Subject: [PATCH 057/963] iio: adc: mp2629: fix wrong comparison of channel Input voltage channel enum is compared against iio address instead of the channel. Fixes: 7abd9fb64682 ("iio: adc: mp2629: Add support for mp2629 ADC driver") Signed-off-by: Saravanan Sekar Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221029093000.45451-2-sravanhome@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mp2629_adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c index 30a31f185d08..f7af9af1665d 100644 --- a/drivers/iio/adc/mp2629_adc.c +++ b/drivers/iio/adc/mp2629_adc.c @@ -74,7 +74,7 @@ static int mp2629_read_raw(struct iio_dev *indio_dev, if (ret) return ret; - if (chan->address == MP2629_INPUT_VOLT) + if (chan->channel == MP2629_INPUT_VOLT) rval &= GENMASK(6, 0); *val = rval; return IIO_VAL_INT; From 67a9aeef44e42b1ac2becf5e61eae0880f48d9db Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 24 Oct 2022 18:55:44 +0200 Subject: [PATCH 058/963] arm64: dts: rockchip: fix node name for hym8563 rtc Fix the node name for hym8563 in all arm64 rockchip devicetrees. Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20221024165549.74574-2-sebastian.reichel@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3368-r88.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 7f5bba0c6001..0e88e9592c1c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -208,7 +208,7 @@ vdd_cpu: syr827@40 { vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts index 38d757c00548..e147d6f8b43e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts @@ -192,7 +192,7 @@ vdd_cpu: syr827@40 { vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts index 5a2661ae0131..18b5050c6cd3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -98,7 +98,7 @@ &fusb0 { }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; interrupt-parent = <&gpio0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi index 935b8c68a71d..6c168566321b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi @@ -297,7 +297,7 @@ &i2c2 { clock-frequency = <400000>; status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; From 2af5bbe32f50d196dd680478a889d12429b3e8cf Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 24 Oct 2022 18:55:45 +0200 Subject: [PATCH 059/963] arm64: dts: rockchip: remove clock-frequency from rtc 'clock-frequency' is not part of the DT binding and not supported by the Linux driver. Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20221024165549.74574-3-sebastian.reichel@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts | 1 - arch/arm64/boot/dts/rockchip/rk3368-r88.dts | 1 - arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts | 1 - arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi | 1 - arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts | 1 - arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts | 1 - 6 files changed, 6 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 0e88e9592c1c..81d1064fdb21 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -212,7 +212,6 @@ hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts index e147d6f8b43e..5589f3db6b36 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-r88.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-r88.dts @@ -196,7 +196,6 @@ hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; /* rtc_int is not connected */ }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts index 18b5050c6cd3..7ba1c28f70a9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc-plus.dts @@ -104,7 +104,6 @@ hym8563: rtc@51 { interrupt-parent = <&gpio0>; interrupts = ; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi index 6c168566321b..bf9eb0405b62 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399pro-vmarc-som.dtsi @@ -301,7 +301,6 @@ hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index c282f6e79960..26d7fda275ed 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -500,7 +500,6 @@ hym8563: rtc@51 { interrupt-parent = <&gpio0>; interrupts = ; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index fb87a168fe96..539ef8cc7792 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -509,7 +509,6 @@ hym8563: rtc@51 { interrupt-parent = <&gpio0>; interrupts = ; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "rtcic_32kout"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; From 17b57beafccb4569accbfc8c11390744cf59c021 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 24 Oct 2022 18:55:46 +0200 Subject: [PATCH 060/963] arm: dts: rockchip: fix node name for hym8563 rtc Fix the node name for hym8563 in all arm rockchip devicetrees. Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20221024165549.74574-4-sebastian.reichel@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3036-evb.dts | 2 +- arch/arm/boot/dts/rk3288-evb-act8846.dts | 2 +- arch/arm/boot/dts/rk3288-firefly.dtsi | 2 +- arch/arm/boot/dts/rk3288-miqi.dts | 2 +- arch/arm/boot/dts/rk3288-rock2-square.dts | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts index 9fd4d9db9f8f..89b0927ce162 100644 --- a/arch/arm/boot/dts/rk3036-evb.dts +++ b/arch/arm/boot/dts/rk3036-evb.dts @@ -35,7 +35,7 @@ phy0: ethernet-phy@0 { &i2c1 { status = "okay"; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts index be695b8c1f67..8a635c243127 100644 --- a/arch/arm/boot/dts/rk3288-evb-act8846.dts +++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts @@ -54,7 +54,7 @@ vdd_gpu: syr828@41 { vin-supply = <&vcc_sys>; }; - hym8563@51 { + rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi index 052afe5543e2..9267857beccb 100644 --- a/arch/arm/boot/dts/rk3288-firefly.dtsi +++ b/arch/arm/boot/dts/rk3288-firefly.dtsi @@ -233,7 +233,7 @@ vdd_gpu: syr828@41 { vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts index 713f55e143c6..e3d5644f2915 100644 --- a/arch/arm/boot/dts/rk3288-miqi.dts +++ b/arch/arm/boot/dts/rk3288-miqi.dts @@ -162,7 +162,7 @@ vdd_gpu: syr828@41 { vin-supply = <&vcc_sys>; }; - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts index 80e0f07c8e87..07a3a52753d2 100644 --- a/arch/arm/boot/dts/rk3288-rock2-square.dts +++ b/arch/arm/boot/dts/rk3288-rock2-square.dts @@ -165,7 +165,7 @@ &hdmi { }; &i2c0 { - hym8563: hym8563@51 { + hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; From 6122f3be70d90a1b2a1188d8910256fc218376a9 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 24 Oct 2022 18:55:47 +0200 Subject: [PATCH 061/963] arm: dts: rockchip: remove clock-frequency from rtc 'clock-frequency' is not part of the DT binding and not supported by the Linux driver. Signed-off-by: Sebastian Reichel Link: https://lore.kernel.org/r/20221024165549.74574-5-sebastian.reichel@collabora.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3036-evb.dts | 1 - arch/arm/boot/dts/rk3288-firefly.dtsi | 1 - arch/arm/boot/dts/rk3288-miqi.dts | 1 - arch/arm/boot/dts/rk3288-rock2-square.dts | 1 - arch/arm/boot/dts/rk3288-vmarc-som.dtsi | 1 - 5 files changed, 5 deletions(-) diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts index 89b0927ce162..becdc0b664bf 100644 --- a/arch/arm/boot/dts/rk3036-evb.dts +++ b/arch/arm/boot/dts/rk3036-evb.dts @@ -39,7 +39,6 @@ hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; }; diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi index 9267857beccb..3836c61cfb76 100644 --- a/arch/arm/boot/dts/rk3288-firefly.dtsi +++ b/arch/arm/boot/dts/rk3288-firefly.dtsi @@ -237,7 +237,6 @@ hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio7>; interrupts = ; diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts index e3d5644f2915..db1eb648e0e1 100644 --- a/arch/arm/boot/dts/rk3288-miqi.dts +++ b/arch/arm/boot/dts/rk3288-miqi.dts @@ -166,7 +166,6 @@ hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; }; diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts index 07a3a52753d2..13cfdaa95cc7 100644 --- a/arch/arm/boot/dts/rk3288-rock2-square.dts +++ b/arch/arm/boot/dts/rk3288-rock2-square.dts @@ -169,7 +169,6 @@ hym8563: rtc@51 { compatible = "haoyu,hym8563"; reg = <0x51>; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "xin32k"; interrupt-parent = <&gpio0>; interrupts = ; diff --git a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi index 0ae2bd150e37..793951655b73 100644 --- a/arch/arm/boot/dts/rk3288-vmarc-som.dtsi +++ b/arch/arm/boot/dts/rk3288-vmarc-som.dtsi @@ -241,7 +241,6 @@ hym8563: rtc@51 { interrupt-parent = <&gpio5>; interrupts = ; #clock-cells = <0>; - clock-frequency = <32768>; clock-output-names = "hym8563"; pinctrl-names = "default"; pinctrl-0 = <&hym8563_int>; From 942b35de22efeb4f9ded83f1ea7747f3fe5a3bb2 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Oct 2022 10:37:32 +0200 Subject: [PATCH 062/963] ARM: dts: rockchip: fix adc-keys sub node names Fix adc-keys sub node names on Rockchip boards, so that they match with regex: '^button-' Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/7a0013b1-3a55-a344-e9ea-eacb4b49433c@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3066a-mk808.dts | 2 +- arch/arm/boot/dts/rk3288-evb.dtsi | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/rk3066a-mk808.dts b/arch/arm/boot/dts/rk3066a-mk808.dts index cfa318a506eb..2db5ba706208 100644 --- a/arch/arm/boot/dts/rk3066a-mk808.dts +++ b/arch/arm/boot/dts/rk3066a-mk808.dts @@ -32,7 +32,7 @@ adc-keys { keyup-threshold-microvolt = <2500000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = ; press-threshold-microvolt = <0>; diff --git a/arch/arm/boot/dts/rk3288-evb.dtsi b/arch/arm/boot/dts/rk3288-evb.dtsi index 399d6b9c5fd4..382d2839cf47 100644 --- a/arch/arm/boot/dts/rk3288-evb.dtsi +++ b/arch/arm/boot/dts/rk3288-evb.dtsi @@ -28,19 +28,19 @@ button-down { press-threshold-microvolt = <300000>; }; - menu { + button-menu { label = "Menu"; linux,code = ; press-threshold-microvolt = <640000>; }; - esc { + button-esc { label = "Esc"; linux,code = ; press-threshold-microvolt = <1000000>; }; - home { + button-home { label = "Home"; linux,code = ; press-threshold-microvolt = <1300000>; From f2bd2e76d6ea13e12849975adae46145375532a4 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Oct 2022 10:38:35 +0200 Subject: [PATCH 063/963] arm64: dts: rockchip: fix adc-keys sub node names Fix adc-keys sub node names on Rockchip boards, so that they match with regex: '^button-' Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/6a6a3603-5540-cacc-2672-c015af1ec684@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-evb.dts | 10 +++++----- arch/arm64/boot/dts/rockchip/rk3308-evb.dts | 12 ++++++------ arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi | 2 +- arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts | 4 ++-- arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi | 2 +- .../boot/dts/rockchip/rk3399-sapphire-excavator.dts | 4 ++-- arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi | 2 +- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/px30-evb.dts b/arch/arm64/boot/dts/rockchip/px30-evb.dts index 07008d84434c..c1bbd555f5f5 100644 --- a/arch/arm64/boot/dts/rockchip/px30-evb.dts +++ b/arch/arm64/boot/dts/rockchip/px30-evb.dts @@ -30,31 +30,31 @@ adc-keys { keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - esc-key { + button-esc { label = "esc"; linux,code = ; press-threshold-microvolt = <1310000>; }; - home-key { + button-home { label = "home"; linux,code = ; press-threshold-microvolt = <624000>; }; - menu-key { + button-menu { label = "menu"; linux,code = ; press-threshold-microvolt = <987000>; }; - vol-down-key { + button-down { label = "volume down"; linux,code = ; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { label = "volume up"; linux,code = ; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts index 9fe9b0d11003..184b84fdde07 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-evb.dts @@ -23,7 +23,7 @@ adc-keys0 { poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - func-key { + button-func { linux,code = ; label = "function"; press-threshold-microvolt = <18000>; @@ -37,31 +37,31 @@ adc-keys1 { poll-interval = <100>; keyup-threshold-microvolt = <1800000>; - esc-key { + button-esc { linux,code = ; label = "micmute"; press-threshold-microvolt = <1130000>; }; - home-key { + button-home { linux,code = ; label = "mode"; press-threshold-microvolt = <901000>; }; - menu-key { + button-menu { linux,code = ; label = "play"; press-threshold-microvolt = <624000>; }; - vol-down-key { + button-down { linux,code = ; label = "volume down"; press-threshold-microvolt = <300000>; }; - vol-up-key { + button-up { linux,code = ; label = "volume up"; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts index 43c928ac98f0..1deef53a4c94 100644 --- a/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3318-a95x-z2.dts @@ -25,7 +25,7 @@ adc-keys { keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "recovery"; linux,code = ; press-threshold-microvolt = <17000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index 2a332763c35c..9d9297bc5f04 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -123,7 +123,7 @@ adc-keys { keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = ; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts index 452728b82e42..3bf8f959e42c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts @@ -39,7 +39,7 @@ adc-keys { keyup-threshold-microvolt = <1800000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = ; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts index 72182c58cc46..65cb21837b0c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-m4b.dts @@ -19,7 +19,7 @@ adc-keys { keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = ; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts index 9e2e246e0bab..dba4d03bfc2b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts @@ -52,13 +52,13 @@ button-down { press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = ; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = ; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi index 2f4b1b2e3ac7..bbf1e3f24585 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi @@ -41,7 +41,7 @@ adc-keys { keyup-threshold-microvolt = <1500000>; poll-interval = <100>; - recovery { + button-recovery { label = "Recovery"; linux,code = ; press-threshold-microvolt = <18000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts index 13927e7d0724..dbec2b7173a0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire-excavator.dts @@ -33,13 +33,13 @@ button-down { press-threshold-microvolt = <300000>; }; - back { + button-back { label = "Back"; linux,code = ; press-threshold-microvolt = <985000>; }; - menu { + button-menu { label = "Menu"; linux,code = ; press-threshold-microvolt = <1314000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index 0d45868132b9..8d61f824c12d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -23,7 +23,7 @@ adc-keys { io-channel-names = "buttons"; keyup-threshold-microvolt = <1750000>; - recovery { + button-recovery { label = "recovery"; linux,code = ; press-threshold-microvolt = <0>; From dd847fe34cdf1e89afed1af24986359f13082bfb Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Oct 2022 10:58:22 +0200 Subject: [PATCH 064/963] ARM: dts: rockchip: fix ir-receiver node names Fix ir-receiver node names on Rockchip boards, so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$' Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/ea5af279-f44c-afea-023d-bb37f5a0d58d@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3188-radxarock.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index e7cf18823558..118deacd38c4 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -71,7 +71,7 @@ spdif_out: spdif-out { #sound-dai-cells = <0>; }; - ir_recv: gpio-ir-receiver { + ir_recv: ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; From de0d04b9780a23eb928aedfb6f981285f78d58e5 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Oct 2022 10:59:10 +0200 Subject: [PATCH 065/963] arm64: dts: rockchip: fix ir-receiver node names Fix ir-receiver node names on Rockchip boards, so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$' Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/e9764253-8ce8-150b-4820-41f03f845469@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index ea6820902ede..7ea48167747c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -19,7 +19,7 @@ chosen { stdout-path = "serial2:1500000n8"; }; - ir_rx { + ir-receiver { compatible = "gpio-ir-receiver"; gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; From 11871e20bcb23c00966e785a124fb72bc8340af4 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Oct 2022 01:31:37 +0200 Subject: [PATCH 066/963] ARM: dts: rockchip: rk3188: fix lcdc1-rgb24 node name The lcdc1-rgb24 node name is out of line with the rest of the rk3188 lcdc1 node, so fix it. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/7b9c0a6f-626b-07e8-ae74-7e0f08b8d241@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3188.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index cdd4a0bd5133..b8f34bef0efa 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -379,7 +379,7 @@ lcdc1_vsync: lcdc1-vsync { rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>; }; - lcdc1_rgb24: ldcd1-rgb24 { + lcdc1_rgb24: lcdc1-rgb24 { rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, <2 RK_PA1 1 &pcfg_pull_none>, <2 RK_PA2 1 &pcfg_pull_none>, From 0873509ea64739a9be02e8ee7b4ff573e503ab8e Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 26 Oct 2022 19:43:03 +0200 Subject: [PATCH 067/963] ARM: dts: lan966x: Enable sgpio on pcb8291 Enable sgpio node on pcb8291 as this is needed to be able to control the LEDs on this board. Otherwise the LEDs support on the board will not be available. On the other board pcb8309 the sgpio is already enabled because it needed to access the SFP ports. Fixes: 0b7baa1a307f ("ARM: dts: lan966x: add led configuration") Signed-off-by: Horatiu Vultur Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221026174303.702919-1-horatiu.vultur@microchip.com --- arch/arm/boot/dts/lan966x-pcb8291.dts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm/boot/dts/lan966x-pcb8291.dts b/arch/arm/boot/dts/lan966x-pcb8291.dts index f4f054cdf2a8..3a3d76af8612 100644 --- a/arch/arm/boot/dts/lan966x-pcb8291.dts +++ b/arch/arm/boot/dts/lan966x-pcb8291.dts @@ -69,6 +69,12 @@ can0_b_pins: can0-b-pins { pins = "GPIO_35", "GPIO_36"; function = "can0_b"; }; + + sgpio_a_pins: sgpio-a-pins { + /* SCK, D0, D1, LD */ + pins = "GPIO_32", "GPIO_33", "GPIO_34", "GPIO_35"; + function = "sgpio_a"; + }; }; &can0 { @@ -118,6 +124,20 @@ &serdes { status = "okay"; }; +&sgpio { + pinctrl-0 = <&sgpio_a_pins>; + pinctrl-names = "default"; + microchip,sgpio-port-ranges = <0 3>, <8 11>; + status = "okay"; + + gpio@0 { + ngpios = <64>; + }; + gpio@1 { + ngpios = <64>; + }; +}; + &switch { status = "okay"; }; From e59bf547a7dd366f93bfebb7487959580ca6c0ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 27 Oct 2022 11:57:58 +0200 Subject: [PATCH 068/963] ASoC: tas2770: Fix set_tdm_slot in case of single slot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a special branch in the set_tdm_slot op for the case of nslots being 1, but: (1) That branch can never work (there's a check for tx_mask being non-zero, later there's another check for it *being* zero; one or the other always throws -EINVAL). (2) The intention of the branch seems to be what the general other branch reduces to in case of nslots being 1. For those reasons remove the 'nslots being 1' special case. Fixes: 1a476abc723e ("tas2770: add tas2770 smart PA kernel driver") Suggested-by: Jos Dehaes Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20221027095800.16094-1-povik+lin@cutebit.org Signed-off-by: Mark Brown --- sound/soc/codecs/tas2770.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index b6765235a4b3..8557759acb1f 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -395,21 +395,13 @@ static int tas2770_set_dai_tdm_slot(struct snd_soc_dai *dai, if (tx_mask == 0 || rx_mask != 0) return -EINVAL; - if (slots == 1) { - if (tx_mask != 1) - return -EINVAL; - - left_slot = 0; - right_slot = 0; + left_slot = __ffs(tx_mask); + tx_mask &= ~(1 << left_slot); + if (tx_mask == 0) { + right_slot = left_slot; } else { - left_slot = __ffs(tx_mask); - tx_mask &= ~(1 << left_slot); - if (tx_mask == 0) { - right_slot = left_slot; - } else { - right_slot = __ffs(tx_mask); - tx_mask &= ~(1 << right_slot); - } + right_slot = __ffs(tx_mask); + tx_mask &= ~(1 << right_slot); } if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) From faac764ea1ea6898d93e46c403271fb105c0906e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 27 Oct 2022 11:57:59 +0200 Subject: [PATCH 069/963] ASoC: tas2764: Fix set_tdm_slot in case of single slot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a special branch in the set_tdm_slot op for the case of nslots being 1, but: (1) That branch can never work (there's a check for tx_mask being non-zero, later there's another check for it *being* zero; one or the other always throws -EINVAL). (2) The intention of the branch seems to be what the general other branch reduces to in case of nslots being 1. For those reasons remove the 'nslots being 1' special case. Fixes: 827ed8a0fa50 ("ASoC: tas2764: Add the driver for the TAS2764") Suggested-by: Jos Dehaes Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20221027095800.16094-2-povik+lin@cutebit.org Signed-off-by: Mark Brown --- sound/soc/codecs/tas2764.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index 51b87a936179..2e0ed3e68fa5 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -438,20 +438,13 @@ static int tas2764_set_dai_tdm_slot(struct snd_soc_dai *dai, if (tx_mask == 0 || rx_mask != 0) return -EINVAL; - if (slots == 1) { - if (tx_mask != 1) - return -EINVAL; - left_slot = 0; - right_slot = 0; + left_slot = __ffs(tx_mask); + tx_mask &= ~(1 << left_slot); + if (tx_mask == 0) { + right_slot = left_slot; } else { - left_slot = __ffs(tx_mask); - tx_mask &= ~(1 << left_slot); - if (tx_mask == 0) { - right_slot = left_slot; - } else { - right_slot = __ffs(tx_mask); - tx_mask &= ~(1 << right_slot); - } + right_slot = __ffs(tx_mask); + tx_mask &= ~(1 << right_slot); } if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) From 6f934afa6a980bb8d3ce73836b1a9922685e50d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 27 Oct 2022 11:58:00 +0200 Subject: [PATCH 070/963] ASoC: tas2780: Fix set_tdm_slot in case of single slot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a special branch in the set_tdm_slot op for the case of nslots being 1, but: (1) That branch can never work (there's a check for tx_mask being non-zero, later there's another check for it *being* zero; one or the other always throws -EINVAL). (2) The intention of the branch seems to be what the general other branch reduces to in case of nslots being 1. For those reasons remove the 'nslots being 1' special case. Fixes: eae9f9ce181b ("ASoC: add tas2780 driver") Suggested-by: Jos Dehaes Signed-off-by: Martin Povišer Link: https://lore.kernel.org/r/20221027095800.16094-3-povik+lin@cutebit.org Signed-off-by: Mark Brown --- sound/soc/codecs/tas2780.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/sound/soc/codecs/tas2780.c b/sound/soc/codecs/tas2780.c index a6db6f0e5431..afdf0c863aa1 100644 --- a/sound/soc/codecs/tas2780.c +++ b/sound/soc/codecs/tas2780.c @@ -380,20 +380,13 @@ static int tas2780_set_dai_tdm_slot(struct snd_soc_dai *dai, if (tx_mask == 0 || rx_mask != 0) return -EINVAL; - if (slots == 1) { - if (tx_mask != 1) - return -EINVAL; - left_slot = 0; - right_slot = 0; + left_slot = __ffs(tx_mask); + tx_mask &= ~(1 << left_slot); + if (tx_mask == 0) { + right_slot = left_slot; } else { - left_slot = __ffs(tx_mask); - tx_mask &= ~(1 << left_slot); - if (tx_mask == 0) { - right_slot = left_slot; - } else { - right_slot = __ffs(tx_mask); - tx_mask &= ~(1 << right_slot); - } + right_slot = __ffs(tx_mask); + tx_mask &= ~(1 << right_slot); } if (tx_mask != 0 || left_slot >= slots || right_slot >= slots) From 7ee47dcfff1835ff75a794d1075b6b5f5462cfed Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 31 Oct 2022 18:52:56 +0100 Subject: [PATCH 071/963] fs: use acquire ordering in __fget_light() We must prevent the CPU from reordering the files->count read with the FD table access like this, on architectures where read-read reordering is possible: files_lookup_fd_raw() close_fd() put_files_struct() atomic_read(&files->count) I would like to mark this for stable, but the stable rules explicitly say "no theoretical races", and given that the FD table pointer and files->count are explicitly stored in the same cacheline, this sort of reordering seems quite unlikely in practice... Signed-off-by: Jann Horn Signed-off-by: Al Viro --- fs/file.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/file.c b/fs/file.c index 5f9c802a5d8d..c942c89ca4cd 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1003,7 +1003,16 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask) struct files_struct *files = current->files; struct file *file; - if (atomic_read(&files->count) == 1) { + /* + * If another thread is concurrently calling close_fd() followed + * by put_files_struct(), we must not observe the old table + * entry combined with the new refcount - otherwise we could + * return a file that is concurrently being freed. + * + * atomic_read_acquire() pairs with atomic_dec_and_test() in + * put_files_struct(). + */ + if (atomic_read_acquire(&files->count) == 1) { file = files_lookup_fd_raw(files, fd); if (!file || unlikely(file->f_mode & mask)) return 0; From 9a1d248bb4beaf1b43d17ba12481ee0629fa29b9 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 31 Oct 2022 15:58:36 -0400 Subject: [PATCH 072/963] ASoC: Intel: soc-acpi: add ES83x6 support to IceLake Missing entry to find a machine driver for ES83x6-based platforms. Link: https://github.com/thesofproject/linux/issues/3873 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20221031195836.250193-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/common/soc-acpi-intel-icl-match.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/soc/intel/common/soc-acpi-intel-icl-match.c b/sound/soc/intel/common/soc-acpi-intel-icl-match.c index b032bc07de8b..d0062f2cd256 100644 --- a/sound/soc/intel/common/soc-acpi-intel-icl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-icl-match.c @@ -10,6 +10,11 @@ #include #include "../skylake/skl.h" +static const struct snd_soc_acpi_codecs essx_83x6 = { + .num_codecs = 3, + .codecs = { "ESSX8316", "ESSX8326", "ESSX8336"}, +}; + static struct skl_machine_pdata icl_pdata = { .use_tplg_pcm = true, }; @@ -27,6 +32,14 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_icl_machines[] = { .drv_name = "sof_rt5682", .sof_tplg_filename = "sof-icl-rt5682.tplg", }, + { + .comp_ids = &essx_83x6, + .drv_name = "sof-essx8336", + .sof_tplg_filename = "sof-icl-es8336", /* the tplg suffix is added at run time */ + .tplg_quirk_mask = SND_SOC_ACPI_TPLG_INTEL_SSP_NUMBER | + SND_SOC_ACPI_TPLG_INTEL_SSP_MSB | + SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER, + }, {}, }; EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_icl_machines); From f9be5cb6c1f0191f8bcf4413b7e17e58e8dfaaa1 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sat, 29 Oct 2022 00:40:52 +0200 Subject: [PATCH 073/963] power: supply: ip5xxx: Fix integer overflow in current_now calculation When current is larger than ~2A, the multiplication in current_now property overflows and the kernel reports invalid negative current value. Change the numerator and denominator while preserving their ratio to allow up to +-6A before the overflow. Fixes: 75853406fa27 ("power: supply: Add a driver for Injoinic power bank ICs") Signed-off-by: Ondrej Jirman Reviewed-by: Samuel Holland [use 149197/200 instead of 261095/350 as suggested by Samuel] Signed-off-by: Sebastian Reichel --- drivers/power/supply/ip5xxx_power.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/supply/ip5xxx_power.c b/drivers/power/supply/ip5xxx_power.c index 218e8e689a3f..00221e9c0bfc 100644 --- a/drivers/power/supply/ip5xxx_power.c +++ b/drivers/power/supply/ip5xxx_power.c @@ -352,7 +352,7 @@ static int ip5xxx_battery_get_property(struct power_supply *psy, ret = ip5xxx_battery_read_adc(ip5xxx, IP5XXX_BATIADC_DAT0, IP5XXX_BATIADC_DAT1, &raw); - val->intval = DIV_ROUND_CLOSEST(raw * 745985, 1000); + val->intval = DIV_ROUND_CLOSEST(raw * 149197, 200); return 0; case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT: From 767e684367e4759d9855b184045b7a9d6b19acd2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 30 Oct 2022 21:55:54 +0100 Subject: [PATCH 074/963] power: supply: ab8500: Defer thermal zone probe The call thermal_zone_get_zone_by_name() used to return the thermal zone right away, but recent refactorings in the thermal core has changed this so the thermal zone used by the battery is probed later, and the call returns -ENODEV. This was always quite fragile. If we get -ENODEV, then return a -EPROBE_DEFER and try again later. Cc: phone-devel@vger.kernel.org Fixes: 2b0e7ac0841b ("power: supply: ab8500: Integrate thermal zone") Signed-off-by: Linus Walleij Signed-off-by: Sebastian Reichel --- drivers/power/supply/ab8500_btemp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/power/supply/ab8500_btemp.c b/drivers/power/supply/ab8500_btemp.c index 863fabe05bdc..307ee6f71042 100644 --- a/drivers/power/supply/ab8500_btemp.c +++ b/drivers/power/supply/ab8500_btemp.c @@ -725,7 +725,14 @@ static int ab8500_btemp_probe(struct platform_device *pdev) /* Get thermal zone and ADC */ di->tz = thermal_zone_get_zone_by_name("battery-thermal"); if (IS_ERR(di->tz)) { - return dev_err_probe(dev, PTR_ERR(di->tz), + ret = PTR_ERR(di->tz); + /* + * This usually just means we are probing before the thermal + * zone, so just defer. + */ + if (ret == -ENODEV) + ret = -EPROBE_DEFER; + return dev_err_probe(dev, ret, "failed to get battery thermal zone\n"); } di->bat_ctrl = devm_iio_channel_get(dev, "bat_ctrl"); From ca1547ab15f48dc81624183ae17a2fd1bad06dfc Mon Sep 17 00:00:00 2001 From: Saravanan Sekar Date: Sat, 29 Oct 2022 11:29:55 +0200 Subject: [PATCH 075/963] iio: adc: mp2629: fix potential array out of bound access Add sentinel at end of maps to avoid potential array out of bound access in iio core. Fixes: 7abd9fb64682 ("iio: adc: mp2629: Add support for mp2629 ADC driver") Signed-off-by: Saravanan Sekar Link: https://lore.kernel.org/r/20221029093000.45451-4-sravanhome@gmail.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/mp2629_adc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/mp2629_adc.c b/drivers/iio/adc/mp2629_adc.c index f7af9af1665d..88e947f300cf 100644 --- a/drivers/iio/adc/mp2629_adc.c +++ b/drivers/iio/adc/mp2629_adc.c @@ -57,7 +57,8 @@ static struct iio_map mp2629_adc_maps[] = { MP2629_MAP(SYSTEM_VOLT, "system-volt"), MP2629_MAP(INPUT_VOLT, "input-volt"), MP2629_MAP(BATT_CURRENT, "batt-current"), - MP2629_MAP(INPUT_CURRENT, "input-current") + MP2629_MAP(INPUT_CURRENT, "input-current"), + { } }; static int mp2629_read_raw(struct iio_dev *indio_dev, From 65f20301607d07ee279b0804d11a05a62a6c1a1c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 24 Oct 2022 16:45:11 +0800 Subject: [PATCH 076/963] iio: adc: at91_adc: fix possible memory leak in at91_adc_allocate_trigger() If iio_trigger_register() returns error, it should call iio_trigger_free() to give up the reference that hold in iio_trigger_alloc(), so that it can call iio_trig_release() to free memory when the refcount hit to 0. Fixes: 0e589d5fb317 ("ARM: AT91: IIO: Add AT91 ADC driver.") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221024084511.815096-1-yangyingliang@huawei.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91_adc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index 532daaa6f943..366e252ebeb0 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -634,8 +634,10 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *idev, trig->ops = &at91_adc_trigger_ops; ret = iio_trigger_register(trig); - if (ret) + if (ret) { + iio_trigger_free(trig); return NULL; + } return trig; } From dd4753f88f242f46d0d8726f5936f64e754a47f2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 12:39:52 +0300 Subject: [PATCH 077/963] iio: imu: bno055: uninitialized variable bug in bno055_trigger_handler() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug is basically harmless, although it will trigger a runtime warning if you use KMSan. On the first iteration through the loop, the "best_delta" variable is uninitialized so re-order the condition to prevent reading uninitialized memory. Fixes: 4aefe1c2bd0c ("iio: imu: add Bosch Sensortec BNO055 core driver") Signed-off-by: Dan Carpenter Acked-by: Nuno Sá Link: https://lore.kernel.org/r/Y0kuaO9PQkSQja+A@kili Signed-off-by: Jonathan Cameron --- drivers/iio/imu/bno055/bno055.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/bno055/bno055.c b/drivers/iio/imu/bno055/bno055.c index 307557a609e3..52744dd98e65 100644 --- a/drivers/iio/imu/bno055/bno055.c +++ b/drivers/iio/imu/bno055/bno055.c @@ -632,7 +632,7 @@ static int bno055_set_regmask(struct bno055_priv *priv, int val, int val2, return -EINVAL; } delta = abs(tbl_val - req_val); - if (delta < best_delta || first) { + if (first || delta < best_delta) { best_delta = delta; hwval = i; first = false; From 7c919b619bcc68158921b1bd968f0e704549bbb6 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 14 Oct 2022 10:15:19 +0300 Subject: [PATCH 078/963] tools: iio: iio_generic_buffer: Fix read size When noevents is true and small buffer is used the allocated memory for holding the data may be smaller than the hard-coded 64 bytes. This can cause the iio_generic_buffer to crash. Following was recorded on beagle bone black with v6.0 kernel and the digit fix patch: https://lore.kernel.org/all/Y0f+tKCz+ZAIoroQ@dc75zzyyyyyyyyyyyyycy-3.rev.dnainternet.fi/ using valgrind; ==339== Using Valgrind-3.18.1 and LibVEX; rerun with -h for copyright info ==339== Command: /iio_generic_buffer -n kx022-accel -T0 -e -l 10 -a -w 2000000 ==339== Parent PID: 307 ==339== ==339== Syscall param read(buf) points to unaddressable byte(s) ==339== at 0x496BFA4: read (read.c:26) ==339== by 0x11699: main (iio_generic_buffer.c:724) ==339== Address 0x4ab3518 is 0 bytes after a block of size 160 alloc'd ==339== at 0x4864B70: malloc (vg_replace_malloc.c:381) ==339== by 0x115BB: main (iio_generic_buffer.c:677) Fix this by always using the same size for reading as was used for data storage allocation. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/Y0kMh0t5qUXJw3nQ@dc75zzyyyyyyyyyyyyycy-3.rev.dnainternet.fi Signed-off-by: Jonathan Cameron --- tools/iio/iio_generic_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 2491c54a5e4f..f8deae4e26a1 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -715,12 +715,12 @@ int main(int argc, char **argv) continue; } - toread = buf_len; } else { usleep(timedelay); - toread = 64; } + toread = buf_len; + read_size = read(buf_fd, data, toread * scan_size); if (read_size < 0) { if (errno == EAGAIN) { From cef8cdc0d0e7c701fe4dcfba4ed3fd25d28a6020 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 26 Oct 2022 15:41:04 +0300 Subject: [PATCH 079/963] ARM: at91: pm: avoid soft resetting AC DLL Do not soft reset AC DLL as controller is buggy and this operation my introduce glitches in the controller leading to undefined behavior. Fixes: f0bbf17958e8 ("ARM: at91: pm: add self-refresh support for sama7g5") Depends-on: a02875c4cbd6 ("ARM: at91: pm: fix self-refresh for sama7g5") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221026124114.985876-2-claudiu.beznea@microchip.com --- arch/arm/mach-at91/pm_suspend.S | 7 ++++++- include/soc/at91/sama7-ddr.h | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index ffed4d949042..e4904faf1753 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -169,10 +169,15 @@ sr_ena_2: cmp tmp1, #UDDRC_STAT_SELFREF_TYPE_SW bne sr_ena_2 - /* Put DDR PHY's DLL in bypass mode for non-backup modes. */ + /* Disable DX DLLs for non-backup modes. */ cmp r7, #AT91_PM_BACKUP beq sr_ena_3 + /* Do not soft reset the AC DLL. */ + ldr tmp1, [r3, DDR3PHY_ACDLLCR] + bic tmp1, tmp1, DDR3PHY_ACDLLCR_DLLSRST + str tmp1, [r3, DDR3PHY_ACDLLCR] + /* Disable DX DLLs. */ ldr tmp1, [r3, #DDR3PHY_DX0DLLCR] orr tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h index 6ce3bd22f6c6..5ad7ac2e3a7c 100644 --- a/include/soc/at91/sama7-ddr.h +++ b/include/soc/at91/sama7-ddr.h @@ -26,7 +26,10 @@ #define DDR3PHY_PGSR (0x0C) /* DDR3PHY PHY General Status Register */ #define DDR3PHY_PGSR_IDONE (1 << 0) /* Initialization Done */ -#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */ +#define DDR3PHY_ACDLLCR (0x14) /* DDR3PHY AC DLL Control Register */ +#define DDR3PHY_ACDLLCR_DLLSRST (1 << 30) /* DLL Soft Reset */ + +#define DDR3PHY_ACIOCR (0x24) /* DDR3PHY AC I/O Configuration Register */ #define DDR3PHY_ACIOCR_CSPDD_CS0 (1 << 18) /* CS#[0] Power Down Driver */ #define DDR3PHY_ACIOCR_CKPDD_CK0 (1 << 8) /* CK[0] Power Down Driver */ #define DDR3PHY_ACIORC_ACPDD (1 << 3) /* AC Power Down Driver */ From 5d73263f9e7c54ccb20814dc50809b9deb9e2bc7 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 31 Oct 2022 15:56:39 -0400 Subject: [PATCH 080/963] ASoC: hda: intel-dsp-config: add ES83x6 quirk for IceLake Yet another hardware variant we need to handle. Link: https://github.com/thesofproject/linux/issues/3873 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Takashi Iwai Link: https://lore.kernel.org/r/20221031195639.250062-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/hda/intel-dsp-config.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index b9eb3208f288..ae31bb127594 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -320,6 +320,11 @@ static const struct config_entry config_table[] = { {} } }, + { + .flags = FLAG_SOF, + .device = 0x34c8, + .codec_hid = &essx_83x6, + }, { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x34c8, From 003b786b678919e072c2b12ffa73901ef840963e Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Tue, 1 Nov 2022 13:49:13 +0200 Subject: [PATCH 081/963] ASoC: SOF: ipc3-topology: use old pipeline teardown flow with SOF2.1 and older MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally in commit b2ebcf42a48f ("ASoC: SOF: free widgets in sof_tear_down_pipelines() for static pipelines"), freeing of pipeline components at suspend was only done with recent FW as there were known limitations in older firmware versions. Tests show that if static pipelines are used, i.e. all pipelines are setup whenever firmware is powered up, the reverse action of freeing all components at power down, leads to firmware failures with also SOF2.0 and SOF2.1 based firmware. The problems can be specific to certain topologies with e.g. components not prepared to be freed at suspend (as this did not happen with older SOF kernels). To avoid hitting these problems when kernel is upgraded and used with an older firmware, bump the firmware requirement to SOF2.2 or newer. If an older firmware is used, and pipeline is a static one, do not free the components at suspend. This ensures the suspend flow remains backwards compatible with older firmware versions. This limitation does not apply if the product configuration is updated to dynamic pipelines. The limitation is not linked to firmware ABI, as the interface to free pipeline components has been available already before ABI3.19. The problem is in the implementation, so firmware version should be used to decide whether it is safe to use the newer flow or not. This patch adds a new SOF_FW_VER() macro to compare SOF firmware release versions. Link: https://github.com/thesofproject/sof/issues/6475 Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Péter Ujfalusi Link: https://lore.kernel.org/r/20221101114913.1292671-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/info.h | 4 ++++ sound/soc/sof/ipc3-topology.c | 15 ++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/sound/sof/info.h b/include/sound/sof/info.h index 65e86e4e9fd8..75193850ead0 100644 --- a/include/sound/sof/info.h +++ b/include/sound/sof/info.h @@ -36,6 +36,10 @@ enum sof_ipc_ext_data { SOF_IPC_EXT_USER_ABI_INFO = 4, }; +/* Build u32 number in format MMmmmppp */ +#define SOF_FW_VER(MAJOR, MINOR, PATCH) ((uint32_t)( \ + ((MAJOR) << 24) | ((MINOR) << 12) | (PATCH))) + /* FW version - SOF_IPC_GLB_VERSION */ struct sof_ipc_fw_version { struct sof_ipc_hdr hdr; diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index c148715aa0f9..0720e1eae084 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -2275,6 +2275,7 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif struct sof_ipc_fw_version *v = &sdev->fw_ready.version; struct snd_sof_widget *swidget; struct snd_sof_route *sroute; + bool dyn_widgets = false; int ret; /* @@ -2284,12 +2285,14 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif * topology loading the sound card unavailable to open PCMs. */ list_for_each_entry(swidget, &sdev->widget_list, list) { - if (swidget->dynamic_pipeline_widget) + if (swidget->dynamic_pipeline_widget) { + dyn_widgets = true; continue; + } - /* Do not free widgets for static pipelines with FW ABI older than 3.19 */ + /* Do not free widgets for static pipelines with FW older than SOF2.2 */ if (!verify && !swidget->dynamic_pipeline_widget && - v->abi_version < SOF_ABI_VER(3, 19, 0)) { + SOF_FW_VER(v->major, v->minor, v->micro) < SOF_FW_VER(2, 2, 0)) { swidget->use_count = 0; swidget->complete = 0; continue; @@ -2303,9 +2306,11 @@ static int sof_ipc3_tear_down_all_pipelines(struct snd_sof_dev *sdev, bool verif /* * Tear down all pipelines associated with PCMs that did not get suspended * and unset the prepare flag so that they can be set up again during resume. - * Skip this step for older firmware. + * Skip this step for older firmware unless topology has any + * dynamic pipeline (in which case the step is mandatory). */ - if (!verify && v->abi_version >= SOF_ABI_VER(3, 19, 0)) { + if (!verify && (dyn_widgets || SOF_FW_VER(v->major, v->minor, v->micro) >= + SOF_FW_VER(2, 2, 0))) { ret = sof_tear_down_left_over_pipelines(sdev); if (ret < 0) { dev_err(sdev->dev, "failed to tear down paused pipelines\n"); From 038ee49fef18710bedd38b531d173ccd746b2d8d Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 27 Sep 2022 13:52:34 +0200 Subject: [PATCH 082/963] serial: 8250: 8250_omap: Avoid RS485 RTS glitch on ->set_termios() RS485-enabled UART ports on TI Sitara SoCs with active-low polarity exhibit a Transmit Enable glitch on ->set_termios(): omap8250_restore_regs(), which is called from omap_8250_set_termios(), sets the TCRTLR bit in the MCR register and clears all other bits, including RTS. If RTS uses active-low polarity, it is now asserted for no reason. The TCRTLR bit is subsequently cleared by writing up->mcr to the MCR register. That variable is always zero, so the RTS bit is still cleared (incorrectly so if RTS is active-high). (up->mcr is not, as one might think, a cache of the MCR register's current value. Rather, it only caches a single bit of that register, the AFE bit. And it only does so if the UART supports the AFE bit, which OMAP does not. For details see serial8250_do_set_termios() and serial8250_do_set_mctrl().) Finally at the end of omap8250_restore_regs(), the MCR register is restored (and RTS deasserted) by a call to up->port.ops->set_mctrl() (which equals serial8250_set_mctrl()) and serial8250_em485_stop_tx(). So there's an RTS glitch between setting TCRTLR and calling serial8250_em485_stop_tx(). Avoid by using a read-modify-write when setting TCRTLR. While at it, drop a redundant initialization of up->mcr. As explained above, the variable isn't used by the driver and it is already initialized to zero because it is part of the static struct serial8250_ports[] declared in 8250_core.c. (Static structs are initialized to zero per section 6.7.8 nr. 10 of the C99 standard.) Cc: Jan Kiszka Cc: Su Bao Cheng Tested-by: Matthias Schiffer Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/6554b0241a2c7fd50f32576fdbafed96709e11e8.1664278942.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 41b8c6b27136..68f5a167377f 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -292,6 +292,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) { struct omap8250_priv *priv = up->port.private_data; struct uart_8250_dma *dma = up->dma; + u8 mcr = serial8250_in_MCR(up); if (dma && dma->tx_running) { /* @@ -308,7 +309,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) serial_out(up, UART_EFR, UART_EFR_ECB); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); - serial8250_out_MCR(up, UART_MCR_TCRTLR); + serial8250_out_MCR(up, mcr | UART_MCR_TCRTLR); serial_out(up, UART_FCR, up->fcr); omap8250_update_scr(up, priv); @@ -324,7 +325,8 @@ static void omap8250_restore_regs(struct uart_8250_port *up) serial_out(up, UART_LCR, 0); /* drop TCR + TLR access, we setup XON/XOFF later */ - serial8250_out_MCR(up, up->mcr); + serial8250_out_MCR(up, mcr); + serial_out(up, UART_IER, up->ier); serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); @@ -669,7 +671,6 @@ static int omap_8250_startup(struct uart_port *port) pm_runtime_get_sync(port->dev); - up->mcr = 0; serial_out(up, UART_FCR, UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); serial_out(up, UART_LCR, UART_LCR_WLEN8); From 93810191f5d23652c0b8a1a9b3a4a89d6fd5063e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 24 Oct 2022 09:36:13 +0300 Subject: [PATCH 083/963] serial: 8250: omap: Fix missing PM runtime calls for omap8250_set_mctrl() There are cases where omap8250_set_mctrl() may get called after the UART has already autoidled causing an asynchronous external abort. This can happen on ttyport_open(): mem_serial_in from omap8250_set_mctrl+0x38/0xa0 omap8250_set_mctrl from uart_update_mctrl+0x4c/0x58 uart_update_mctrl from uart_dtr_rts+0x60/0xa8 uart_dtr_rts from tty_port_block_til_ready+0xd0/0x2a8 tty_port_block_til_ready from uart_open+0x14/0x1c uart_open from ttyport_open+0x64/0x148 And on ttyport_close(): omap8250_set_mctrl from uart_update_mctrl+0x3c/0x48 uart_update_mctrl from uart_dtr_rts+0x54/0x9c uart_dtr_rts from tty_port_shutdown+0x78/0x9c tty_port_shutdown from tty_port_close+0x3c/0x74 tty_port_close from ttyport_close+0x40/0x58 It can also happen on disassociate_ctty() calling uart_shutdown() that ends up calling omap8250_set_mctrl(). Let's fix the issue by adding missing PM runtime calls to omap8250_set_mctrl(). To do this, we need to add __omap8250_set_mctrl() that can be called from both omap8250_set_mctrl(), and from runtime PM resume path when restoring the registers. Fixes: 61929cf0169d ("tty: serial: Add 8250-core based omap driver") Reported-by: Merlijn Wajer Reported-by: Romain Naour Reported-by: Ivaylo Dimitrov Tested-by: Ivaylo Dimitrov Signed-off-by: Tony Lindgren Depends-on: dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") Link: https://lore.kernel.org/r/20221024063613.25943-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 68f5a167377f..7bfa87094125 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -157,7 +157,11 @@ static u32 uart_read(struct uart_8250_port *up, u32 reg) return readl(up->port.membase + (reg << up->port.regshift)); } -static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) +/* + * Called on runtime PM resume path from omap8250_restore_regs(), and + * omap8250_set_mctrl(). + */ +static void __omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) { struct uart_8250_port *up = up_to_u8250p(port); struct omap8250_priv *priv = up->port.private_data; @@ -181,6 +185,20 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) } } +static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + int err; + + err = pm_runtime_resume_and_get(port->dev); + if (err) + return; + + __omap8250_set_mctrl(port, mctrl); + + pm_runtime_mark_last_busy(port->dev); + pm_runtime_put_autosuspend(port->dev); +} + /* * Work Around for Errata i202 (2430, 3430, 3630, 4430 and 4460) * The access to uart register after MDR1 Access @@ -343,7 +361,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) omap8250_update_mdr1(up, priv); - up->port.ops->set_mctrl(&up->port, up->port.mctrl); + __omap8250_set_mctrl(&up->port, up->port.mctrl); if (up->port.rs485.flags & SER_RS485_ENABLED) serial8250_em485_stop_tx(up); From e828e56684d61b17317e0cfdef83791fa61cb76b Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Thu, 13 Oct 2022 13:23:39 +0200 Subject: [PATCH 084/963] serial: 8250_omap: remove wait loop from Errata i202 workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were occasionally seeing the "Errata i202: timedout" on an AM335x board when repeatedly opening and closing a UART connected to an active sender. As new input may arrive at any time, it is possible to miss the "RX FIFO empty" condition, forcing the loop to wait until it times out. Nothing in the i202 Advisory states that such a wait is even necessary; other FIFO clear functions like serial8250_clear_fifos() do not wait either. For this reason, it seems safe to remove the wait, fixing the mentioned issue. Fixes: 61929cf0169d ("tty: serial: Add 8250-core based omap driver") Reviewed-by: Ilpo Järvinen Signed-off-by: Matthias Schiffer Link: https://lore.kernel.org/r/20221013112339.2540767-1-matthias.schiffer@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 7bfa87094125..766c57d50b4c 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -211,27 +211,10 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl) static void omap_8250_mdr1_errataset(struct uart_8250_port *up, struct omap8250_priv *priv) { - u8 timeout = 255; - serial_out(up, UART_OMAP_MDR1, priv->mdr1); udelay(2); serial_out(up, UART_FCR, up->fcr | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); - /* - * Wait for FIFO to empty: when empty, RX_FIFO_E bit is 0 and - * TX_FIFO_E bit is 1. - */ - while (UART_LSR_THRE != (serial_in(up, UART_LSR) & - (UART_LSR_THRE | UART_LSR_DR))) { - timeout--; - if (!timeout) { - /* Should *never* happen. we warn and carry on */ - dev_crit(up->port.dev, "Errata i202: timedout %x\n", - serial_in(up, UART_LSR)); - break; - } - udelay(1); - } } static void omap_8250_get_divisor(struct uart_port *port, unsigned int baud, From e3f0c638f428fd66b5871154b62706772045f91a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 28 Oct 2022 13:58:13 +0300 Subject: [PATCH 085/963] serial: 8250: omap: Fix unpaired pm_runtime_put_sync() in omap8250_remove() On remove, we get an error for "Runtime PM usage count underflow!". I guess this driver is mostly built-in, and this issue has gone unnoticed for a while. Somehow I did not catch this issue with my earlier fix done with commit 4e0f5cc65098 ("serial: 8250_omap: Fix probe and remove for PM runtime"). Fixes: 4e0f5cc65098 ("serial: 8250_omap: Fix probe and remove for PM runtime") Signed-off-by: Tony Lindgren Depends-on: dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") Link: https://lore.kernel.org/r/20221028105813.54290-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 766c57d50b4c..c6ec6eb7ee21 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1460,6 +1460,11 @@ static int omap8250_probe(struct platform_device *pdev) static int omap8250_remove(struct platform_device *pdev) { struct omap8250_priv *priv = platform_get_drvdata(pdev); + int err; + + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + return err; pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_sync(&pdev->dev); From d0b68629bd2fb61e0171a62f2e8da3db322f5cf6 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 28 Oct 2022 14:00:44 +0300 Subject: [PATCH 086/963] serial: 8250: omap: Flush PM QOS work on remove Rebinding 8250_omap in a loop will at some point produce a warning for kernel/power/qos.c:296 cpu_latency_qos_update_request() with error "cpu_latency_qos_update_request called for unknown object". Let's flush the possibly pending PM QOS work scheduled from omap8250_runtime_suspend() before we disable runtime PM. Fixes: 61929cf0169d ("tty: serial: Add 8250-core based omap driver") Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20221028110044.54719-1-tony@atomide.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_omap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index c6ec6eb7ee21..3f33014022f0 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1468,6 +1468,7 @@ static int omap8250_remove(struct platform_device *pdev) pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_sync(&pdev->dev); + flush_work(&priv->qos_work); pm_runtime_disable(&pdev->dev); serial8250_unregister_port(priv->line); cpu_latency_qos_remove_request(&priv->pm_qos_request); From 76bad3f88750f8cc465c489e6846249e0bc3d8f5 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Mon, 24 Oct 2022 16:58:44 +0800 Subject: [PATCH 087/963] tty: serial: fsl_lpuart: don't break the on-going transfer when global reset lpuart_global_reset() shouldn't break the on-going transmit engine, need to recover the on-going data transfer after reset. This can help earlycon here, since commit 60f361722ad2 ("serial: fsl_lpuart: Reset prior to registration") moved lpuart_global_reset() before uart_add_one_port(), earlycon is writing during global reset, as global reset will disable the TX and clear the baud rate register, which caused the earlycon cannot work any more after reset, needs to restore the baud rate and re-enable the transmitter to recover the earlycon write. Also move the lpuart_global_reset() down, then we can reuse the lpuart32_tx_empty() without declaration. Fixes: bd5305dcabbc ("tty: serial: fsl_lpuart: do software reset for imx7ulp and imx8qxp") Signed-off-by: Sherry Sun Link: https://lore.kernel.org/r/20221024085844.22786-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 76 +++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 67fa113f77d4..888e01fbd9c5 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -404,33 +405,6 @@ static unsigned int lpuart_get_baud_clk_rate(struct lpuart_port *sport) #define lpuart_enable_clks(x) __lpuart_enable_clks(x, true) #define lpuart_disable_clks(x) __lpuart_enable_clks(x, false) -static int lpuart_global_reset(struct lpuart_port *sport) -{ - struct uart_port *port = &sport->port; - void __iomem *global_addr; - int ret; - - if (uart_console(port)) - return 0; - - ret = clk_prepare_enable(sport->ipg_clk); - if (ret) { - dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret); - return ret; - } - - if (is_imx7ulp_lpuart(sport) || is_imx8qxp_lpuart(sport)) { - global_addr = port->membase + UART_GLOBAL - IMX_REG_OFF; - writel(UART_GLOBAL_RST, global_addr); - usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); - writel(0, global_addr); - usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); - } - - clk_disable_unprepare(sport->ipg_clk); - return 0; -} - static void lpuart_stop_tx(struct uart_port *port) { unsigned char temp; @@ -2636,6 +2610,54 @@ static const struct serial_rs485 lpuart_rs485_supported = { /* delay_rts_* and RX_DURING_TX are not supported */ }; +static int lpuart_global_reset(struct lpuart_port *sport) +{ + struct uart_port *port = &sport->port; + void __iomem *global_addr; + unsigned long ctrl, bd; + unsigned int val = 0; + int ret; + + ret = clk_prepare_enable(sport->ipg_clk); + if (ret) { + dev_err(sport->port.dev, "failed to enable uart ipg clk: %d\n", ret); + return ret; + } + + if (is_imx7ulp_lpuart(sport) || is_imx8qxp_lpuart(sport)) { + /* + * If the transmitter is used by earlycon, wait for transmit engine to + * complete and then reset. + */ + ctrl = lpuart32_read(port, UARTCTRL); + if (ctrl & UARTCTRL_TE) { + bd = lpuart32_read(&sport->port, UARTBAUD); + if (read_poll_timeout(lpuart32_tx_empty, val, val, 1, 100000, false, + port)) { + dev_warn(sport->port.dev, + "timeout waiting for transmit engine to complete\n"); + clk_disable_unprepare(sport->ipg_clk); + return 0; + } + } + + global_addr = port->membase + UART_GLOBAL - IMX_REG_OFF; + writel(UART_GLOBAL_RST, global_addr); + usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); + writel(0, global_addr); + usleep_range(GLOBAL_RST_MIN_US, GLOBAL_RST_MAX_US); + + /* Recover the transmitter for earlycon. */ + if (ctrl & UARTCTRL_TE) { + lpuart32_write(port, bd, UARTBAUD); + lpuart32_write(port, ctrl, UARTCTRL); + } + } + + clk_disable_unprepare(sport->ipg_clk); + return 0; +} + static int lpuart_probe(struct platform_device *pdev) { const struct lpuart_soc_data *sdata = of_device_get_match_data(&pdev->dev); From 4561d8008a467cb05ac632a215391d6b787f40aa Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Wed, 12 Oct 2022 20:13:53 +0800 Subject: [PATCH 088/963] serial: imx: Add missing .thaw_noirq hook The following warning is seen with non-console UART instance when system hibernates. [ 37.371969] ------------[ cut here ]------------ [ 37.376599] uart3_root_clk already disabled [ 37.380810] WARNING: CPU: 0 PID: 296 at drivers/clk/clk.c:952 clk_core_disable+0xa4/0xb0 ... [ 37.506986] Call trace: [ 37.509432] clk_core_disable+0xa4/0xb0 [ 37.513270] clk_disable+0x34/0x50 [ 37.516672] imx_uart_thaw+0x38/0x5c [ 37.520250] platform_pm_thaw+0x30/0x6c [ 37.524089] dpm_run_callback.constprop.0+0x3c/0xd4 [ 37.528972] device_resume+0x7c/0x160 [ 37.532633] dpm_resume+0xe8/0x230 [ 37.536036] hibernation_snapshot+0x288/0x430 [ 37.540397] hibernate+0x10c/0x2e0 [ 37.543798] state_store+0xc4/0xd0 [ 37.547203] kobj_attr_store+0x1c/0x30 [ 37.550953] sysfs_kf_write+0x48/0x60 [ 37.554619] kernfs_fop_write_iter+0x118/0x1ac [ 37.559063] new_sync_write+0xe8/0x184 [ 37.562812] vfs_write+0x230/0x290 [ 37.566214] ksys_write+0x68/0xf4 [ 37.569529] __arm64_sys_write+0x20/0x2c [ 37.573452] invoke_syscall.constprop.0+0x50/0xf0 [ 37.578156] do_el0_svc+0x11c/0x150 [ 37.581648] el0_svc+0x30/0x140 [ 37.584792] el0t_64_sync_handler+0xe8/0xf0 [ 37.588976] el0t_64_sync+0x1a0/0x1a4 [ 37.592639] ---[ end trace 56e22eec54676d75 ]--- On hibernating, pm core calls into related hooks in sequence like: .freeze .freeze_noirq .thaw_noirq .thaw With .thaw_noirq hook being absent, the clock will be disabled in a unbalanced call which results the warning above. imx_uart_freeze() clk_prepare_enable() imx_uart_suspend_noirq() clk_disable() imx_uart_thaw clk_disable_unprepare() Adding the missing .thaw_noirq hook as imx_uart_resume_noirq() will have the call sequence corrected as below and thus fix the warning. imx_uart_freeze() clk_prepare_enable() imx_uart_suspend_noirq() clk_disable() imx_uart_resume_noirq() clk_enable() imx_uart_thaw clk_disable_unprepare() Fixes: 09df0b3464e5 ("serial: imx: fix endless loop during suspend") Reviewed-by: Martin Kaiser Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20221012121353.2346280-1-shawn.guo@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 05b432dc7a85..aadda66405b4 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -2594,6 +2594,7 @@ static const struct dev_pm_ops imx_uart_pm_ops = { .suspend_noirq = imx_uart_suspend_noirq, .resume_noirq = imx_uart_resume_noirq, .freeze_noirq = imx_uart_suspend_noirq, + .thaw_noirq = imx_uart_resume_noirq, .restore_noirq = imx_uart_resume_noirq, .suspend = imx_uart_suspend, .resume = imx_uart_resume, From acdab4cb4ba7e5f94d2b422ebd7bf4bf68178fb2 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sat, 8 Oct 2022 14:02:20 +0300 Subject: [PATCH 089/963] Revert "tty: n_gsm: avoid call of sleeping functions from atomic context" This reverts commit 902e02ea9385373ce4b142576eef41c642703955. The above commit is reverted as the usage of tx_mutex seems not to solve the problem described in 902e02ea9385 ("tty: n_gsm: avoid call of sleeping functions from atomic context") and just moves the bug to another place. Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Reviewed-by: Daniel Starke Link: https://lore.kernel.org/r/20221008110221.13645-2-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 53 +++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 5e516f5cac5a..b66e08f3d006 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -264,7 +264,7 @@ struct gsm_mux { bool constipated; /* Asked by remote to shut up */ bool has_devices; /* Devices were registered */ - struct mutex tx_mutex; + spinlock_t tx_lock; unsigned int tx_bytes; /* TX data outstanding */ #define TX_THRESH_HI 8192 #define TX_THRESH_LO 2048 @@ -700,6 +700,7 @@ static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) struct gsm_msg *msg; u8 *dp; int ocr; + unsigned long flags; msg = gsm_data_alloc(gsm, addr, 0, control); if (!msg) @@ -721,10 +722,10 @@ static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) gsm_print_packet("Q->", addr, cr, control, NULL, 0); - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); list_add_tail(&msg->list, &gsm->tx_ctrl_list); gsm->tx_bytes += msg->len; - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); gsmld_write_trigger(gsm); return 0; @@ -749,7 +750,7 @@ static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) spin_unlock_irqrestore(&dlci->lock, flags); /* Clear data packets in MUX write queue */ - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { if (msg->addr != addr) continue; @@ -757,7 +758,7 @@ static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) list_del(&msg->list); kfree(msg); } - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** @@ -1043,9 +1044,10 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) { - mutex_lock(&dlci->gsm->tx_mutex); + unsigned long flags; + spin_lock_irqsave(&dlci->gsm->tx_lock, flags); __gsm_data_queue(dlci, msg); - mutex_unlock(&dlci->gsm->tx_mutex); + spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /** @@ -1057,7 +1059,7 @@ static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) * is data. Keep to the MRU of the mux. This path handles the usual tty * interface which is a byte stream with optional modem data. * - * Caller must hold the tx_mutex of the mux. + * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) @@ -1117,7 +1119,7 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) * is data. Keep to the MRU of the mux. This path handles framed data * queued as skbuffs to the DLCI. * - * Caller must hold the tx_mutex of the mux. + * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, @@ -1133,7 +1135,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, if (dlci->adaption == 4) overhead = 1; - /* dlci->skb is locked by tx_mutex */ + /* dlci->skb is locked by tx_lock */ if (dlci->skb == NULL) { dlci->skb = skb_dequeue_tail(&dlci->skb_list); if (dlci->skb == NULL) @@ -1187,7 +1189,7 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, * Push an empty frame in to the transmit queue to update the modem status * bits and to transmit an optional break. * - * Caller must hold the tx_mutex of the mux. + * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, @@ -1301,12 +1303,13 @@ static int gsm_dlci_data_sweep(struct gsm_mux *gsm) static void gsm_dlci_data_kick(struct gsm_dlci *dlci) { + unsigned long flags; int sweep; if (dlci->constipated) return; - mutex_lock(&dlci->gsm->tx_mutex); + spin_lock_irqsave(&dlci->gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ sweep = (dlci->gsm->tx_bytes < TX_THRESH_LO); if (dlci->gsm->tx_bytes == 0) { @@ -1317,7 +1320,7 @@ static void gsm_dlci_data_kick(struct gsm_dlci *dlci) } if (sweep) gsm_dlci_data_sweep(dlci->gsm); - mutex_unlock(&dlci->gsm->tx_mutex); + spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /* @@ -2029,13 +2032,14 @@ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len) static void gsm_kick_timeout(struct work_struct *work) { struct gsm_mux *gsm = container_of(work, struct gsm_mux, kick_timeout.work); + unsigned long flags; int sent = 0; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ if (gsm->tx_bytes < TX_THRESH_LO) sent = gsm_dlci_data_sweep(gsm); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); if (sent && debug & DBG_DATA) pr_info("%s TX queue stalled\n", __func__); @@ -2565,7 +2569,6 @@ static void gsm_free_mux(struct gsm_mux *gsm) break; } } - mutex_destroy(&gsm->tx_mutex); mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); @@ -2637,7 +2640,6 @@ static struct gsm_mux *gsm_alloc_mux(void) } spin_lock_init(&gsm->lock); mutex_init(&gsm->mutex); - mutex_init(&gsm->tx_mutex); kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_ctrl_list); INIT_LIST_HEAD(&gsm->tx_data_list); @@ -2646,6 +2648,7 @@ static struct gsm_mux *gsm_alloc_mux(void) INIT_WORK(&gsm->tx_work, gsmld_write_task); init_waitqueue_head(&gsm->event); spin_lock_init(&gsm->control_lock); + spin_lock_init(&gsm->tx_lock); gsm->t1 = T1; gsm->t2 = T2; @@ -2670,7 +2673,6 @@ static struct gsm_mux *gsm_alloc_mux(void) } spin_unlock(&gsm_mux_lock); if (i == MAX_MUX) { - mutex_destroy(&gsm->tx_mutex); mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); @@ -2826,16 +2828,17 @@ static void gsmld_write_trigger(struct gsm_mux *gsm) static void gsmld_write_task(struct work_struct *work) { struct gsm_mux *gsm = container_of(work, struct gsm_mux, tx_work); + unsigned long flags; int i, ret; /* All outstanding control channel and control messages and one data * frame is sent. */ ret = -ENODEV; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); if (gsm->tty) ret = gsm_data_kick(gsm); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); if (ret >= 0) for (i = 0; i < NUM_DLCI; i++) @@ -3042,6 +3045,7 @@ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr) { struct gsm_mux *gsm = tty->disc_data; + unsigned long flags; int space; int ret; @@ -3049,13 +3053,13 @@ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, return -ENODEV; ret = -ENOBUFS; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); space = tty_write_room(tty); if (space >= nr) ret = tty->ops->write(tty, buf, nr); else set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); return ret; } @@ -3352,13 +3356,14 @@ static struct tty_ldisc_ops tty_ldisc_packet = { static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk) { struct gsm_mux *gsm = dlci->gsm; + unsigned long flags; if (dlci->state != DLCI_OPEN || dlci->adaption != 2) return; - mutex_lock(&gsm->tx_mutex); + spin_lock_irqsave(&gsm->tx_lock, flags); gsm_dlci_modem_output(gsm, dlci, brk); - mutex_unlock(&gsm->tx_mutex); + spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** From 15743ae50e04aa907131e3ae8d66e9a2964ea232 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Sat, 8 Oct 2022 14:02:21 +0300 Subject: [PATCH 090/963] Revert "tty: n_gsm: replace kicktimer with delayed_work" This reverts commit c9ab053e56ce13a949977398c8edc12e6c02fc95. The above commit is reverted as it was a prerequisite for tx_mutex introduction and tx_mutex has been removed as it does not correctly work in order to protect tx data. Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Reviewed-by: Daniel Starke Link: https://lore.kernel.org/r/20221008110221.13645-3-pchelkin@ispras.ru Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index b66e08f3d006..5c9f76073fcd 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -272,7 +272,7 @@ struct gsm_mux { struct list_head tx_data_list; /* Pending data packets */ /* Control messages */ - struct delayed_work kick_timeout; /* Kick TX queuing on timeout */ + struct timer_list kick_timer; /* Kick TX queuing on timeout */ struct timer_list t2_timer; /* Retransmit timer for commands */ int cretries; /* Command retry counter */ struct gsm_control *pending_cmd;/* Our current pending command */ @@ -1029,7 +1029,7 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) gsm->tx_bytes += msg->len; gsmld_write_trigger(gsm); - schedule_delayed_work(&gsm->kick_timeout, 10 * gsm->t1 * HZ / 100); + mod_timer(&gsm->kick_timer, jiffies + 10 * gsm->t1 * HZ / 100); } /** @@ -2022,16 +2022,16 @@ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len) } /** - * gsm_kick_timeout - transmit if possible - * @work: work contained in our gsm object + * gsm_kick_timer - transmit if possible + * @t: timer contained in our gsm object * * Transmit data from DLCIs if the queue is empty. We can't rely on * a tty wakeup except when we filled the pipe so we need to fire off * new data ourselves in other cases. */ -static void gsm_kick_timeout(struct work_struct *work) +static void gsm_kick_timer(struct timer_list *t) { - struct gsm_mux *gsm = container_of(work, struct gsm_mux, kick_timeout.work); + struct gsm_mux *gsm = from_timer(gsm, t, kick_timer); unsigned long flags; int sent = 0; @@ -2496,7 +2496,7 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) } /* Finish outstanding timers, making sure they are done */ - cancel_delayed_work_sync(&gsm->kick_timeout); + del_timer_sync(&gsm->kick_timer); del_timer_sync(&gsm->t2_timer); /* Finish writing to ldisc */ @@ -2643,7 +2643,7 @@ static struct gsm_mux *gsm_alloc_mux(void) kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_ctrl_list); INIT_LIST_HEAD(&gsm->tx_data_list); - INIT_DELAYED_WORK(&gsm->kick_timeout, gsm_kick_timeout); + timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); INIT_WORK(&gsm->tx_work, gsmld_write_task); init_waitqueue_head(&gsm->event); From 7b7dfe4833c70a11cdfa51b38705103bd31eddaa Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sun, 2 Oct 2022 12:07:09 +0800 Subject: [PATCH 091/963] tty: n_gsm: fix sleep-in-atomic-context bug in gsm_control_send The function gsm_dlci_t1() is a timer handler that runs in an atomic context, but it calls "kzalloc(..., GFP_KERNEL)" that may sleep. As a result, the sleep-in-atomic-context bug will happen. The process is shown below: gsm_dlci_t1() gsm_dlci_open() gsm_modem_update() gsm_modem_upd_via_msc() gsm_control_send() kzalloc(sizeof(.., GFP_KERNEL) //may sleep This patch changes the gfp_t parameter of kzalloc() from GFP_KERNEL to GFP_ATOMIC in order to mitigate the bug. Fixes: e1eaea46bb40 ("tty: n_gsm line discipline") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20221002040709.27849-1-duoming@zju.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_gsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index 5c9f76073fcd..b6e0cc4571ea 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -1711,7 +1711,7 @@ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm, unsigned int command, u8 *data, int clen) { struct gsm_control *ctrl = kzalloc(sizeof(struct gsm_control), - GFP_KERNEL); + GFP_ATOMIC); unsigned long flags; if (ctrl == NULL) return NULL; From c7e37cc6240767f794678d11704935d49cc81d59 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 3 Nov 2022 11:31:43 -0700 Subject: [PATCH 092/963] Input: goodix - try resetting the controller when no config is set On ACPI systems (irq_pin_access_method == IRQ_PIN_ACCESS_ACPI_*) the driver does not reset the controller at probe time, because sometimes the system firmware loads a config and resetting might loose this config. On the Nanote UMPC-01 device OTOH the config is in flash of the controller, the controller needs a reset to load this; and the system firmware does not reset the controller on a cold boot. To fix the Nanote UMPC-01 touchscreen not working on a cold boot, try resetting the controller and then re-reading the config when encountering a config with 0 width/height/max_touch_num value and the controller has not already been reset by goodix_ts_probe(). This should be safe to do in general because normally we should never encounter a config with 0 width/height/max_touch_num. Doing this in general not only avoids the need for a DMI quirk, but also might help other systems. Signed-off-by: Hans de Goede Reviewed-by: Bastien Nocera Link: https://lore.kernel.org/r/20221025122930.421377-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/goodix.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index a33cc7950cf5..c281e49826c2 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -1158,6 +1158,7 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) input_set_abs_params(ts->input_dev, ABS_MT_WIDTH_MAJOR, 0, 255, 0, 0); input_set_abs_params(ts->input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0); +retry_read_config: /* Read configuration and apply touchscreen parameters */ goodix_read_config(ts); @@ -1165,6 +1166,16 @@ static int goodix_configure_dev(struct goodix_ts_data *ts) touchscreen_parse_properties(ts->input_dev, true, &ts->prop); if (!ts->prop.max_x || !ts->prop.max_y || !ts->max_touch_num) { + if (!ts->reset_controller_at_probe && + ts->irq_pin_access_method != IRQ_PIN_ACCESS_NONE) { + dev_info(&ts->client->dev, "Config not set, resetting controller\n"); + /* Retry after a controller reset */ + ts->reset_controller_at_probe = true; + error = goodix_reset(ts); + if (error) + return error; + goto retry_read_config; + } dev_err(&ts->client->dev, "Invalid config (%d, %d, %d), using defaults\n", ts->prop.max_x, ts->prop.max_y, ts->max_touch_num); From 392cc13c5ec72ccd6bbfb1bc2339502cc59dd285 Mon Sep 17 00:00:00 2001 From: Jason Montleon Date: Thu, 3 Nov 2022 10:46:11 -0400 Subject: [PATCH 093/963] ASoC: rt5514: fix legacy dai naming Starting with 6.0-rc1 these messages are logged and the sound card is unavailable. Adding legacy_dai_naming to the rt5514-spi causes it to function properly again. [ 16.928454] kbl_r5514_5663_max kbl_r5514_5663_max: ASoC: CPU DAI spi-PRP0001:00 not registered [ 16.928561] platform kbl_r5514_5663_max: deferred probe pending Fixes: fc34ece41f71 ("ASoC: Refactor non_legacy_dai_naming flag") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=216641 Signed-off-by: Jason Montleon Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221103144612.4431-1-jmontleo@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5514-spi.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/rt5514-spi.c b/sound/soc/codecs/rt5514-spi.c index 1a25a3787935..362663abcb89 100644 --- a/sound/soc/codecs/rt5514-spi.c +++ b/sound/soc/codecs/rt5514-spi.c @@ -298,13 +298,14 @@ static int rt5514_spi_pcm_new(struct snd_soc_component *component, } static const struct snd_soc_component_driver rt5514_spi_component = { - .name = DRV_NAME, - .probe = rt5514_spi_pcm_probe, - .open = rt5514_spi_pcm_open, - .hw_params = rt5514_spi_hw_params, - .hw_free = rt5514_spi_hw_free, - .pointer = rt5514_spi_pcm_pointer, - .pcm_construct = rt5514_spi_pcm_new, + .name = DRV_NAME, + .probe = rt5514_spi_pcm_probe, + .open = rt5514_spi_pcm_open, + .hw_params = rt5514_spi_hw_params, + .hw_free = rt5514_spi_hw_free, + .pointer = rt5514_spi_pcm_pointer, + .pcm_construct = rt5514_spi_pcm_new, + .legacy_dai_naming = 1, }; /** From a1dca8774faf3f77eb34fa0ac6f3e2b82290b1e4 Mon Sep 17 00:00:00 2001 From: Jason Montleon Date: Thu, 3 Nov 2022 10:46:12 -0400 Subject: [PATCH 094/963] ASoC: rt5677: fix legacy dai naming Starting with 6.0-rc1 the CPU DAI is not registered and the sound card is unavailable. Adding legacy_dai_naming causes it to function properly again. Fixes: fc34ece41f71 ("ASoC: Refactor non_legacy_dai_naming flag") Signed-off-by: Jason Montleon Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221103144612.4431-2-jmontleo@redhat.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5677-spi.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/rt5677-spi.c b/sound/soc/codecs/rt5677-spi.c index 8f3993a4c1cc..d25703dd7499 100644 --- a/sound/soc/codecs/rt5677-spi.c +++ b/sound/soc/codecs/rt5677-spi.c @@ -396,15 +396,16 @@ static int rt5677_spi_pcm_probe(struct snd_soc_component *component) } static const struct snd_soc_component_driver rt5677_spi_dai_component = { - .name = DRV_NAME, - .probe = rt5677_spi_pcm_probe, - .open = rt5677_spi_pcm_open, - .close = rt5677_spi_pcm_close, - .hw_params = rt5677_spi_hw_params, - .hw_free = rt5677_spi_hw_free, - .prepare = rt5677_spi_prepare, - .pointer = rt5677_spi_pcm_pointer, - .pcm_construct = rt5677_spi_pcm_new, + .name = DRV_NAME, + .probe = rt5677_spi_pcm_probe, + .open = rt5677_spi_pcm_open, + .close = rt5677_spi_pcm_close, + .hw_params = rt5677_spi_hw_params, + .hw_free = rt5677_spi_hw_free, + .prepare = rt5677_spi_prepare, + .pointer = rt5677_spi_pcm_pointer, + .pcm_construct = rt5677_spi_pcm_new, + .legacy_dai_naming = 1, }; /* Select a suitable transfer command for the next transfer to ensure From d3fd203f36d46aa29600a72d57a1b61af80e4a25 Mon Sep 17 00:00:00 2001 From: Baisong Zhong Date: Wed, 2 Nov 2022 16:16:20 +0800 Subject: [PATCH 095/963] bpf, test_run: Fix alignment problem in bpf_prog_test_run_skb() We got a syzkaller problem because of aarch64 alignment fault if KFENCE enabled. When the size from user bpf program is an odd number, like 399, 407, etc, it will cause the struct skb_shared_info's unaligned access. As seen below: BUG: KFENCE: use-after-free read in __skb_clone+0x23c/0x2a0 net/core/skbuff.c:1032 Use-after-free read at 0xffff6254fffac077 (in kfence-#213): __lse_atomic_add arch/arm64/include/asm/atomic_lse.h:26 [inline] arch_atomic_add arch/arm64/include/asm/atomic.h:28 [inline] arch_atomic_inc include/linux/atomic-arch-fallback.h:270 [inline] atomic_inc include/asm-generic/atomic-instrumented.h:241 [inline] __skb_clone+0x23c/0x2a0 net/core/skbuff.c:1032 skb_clone+0xf4/0x214 net/core/skbuff.c:1481 ____bpf_clone_redirect net/core/filter.c:2433 [inline] bpf_clone_redirect+0x78/0x1c0 net/core/filter.c:2420 bpf_prog_d3839dd9068ceb51+0x80/0x330 bpf_dispatcher_nop_func include/linux/bpf.h:728 [inline] bpf_test_run+0x3c0/0x6c0 net/bpf/test_run.c:53 bpf_prog_test_run_skb+0x638/0xa7c net/bpf/test_run.c:594 bpf_prog_test_run kernel/bpf/syscall.c:3148 [inline] __do_sys_bpf kernel/bpf/syscall.c:4441 [inline] __se_sys_bpf+0xad0/0x1634 kernel/bpf/syscall.c:4381 kfence-#213: 0xffff6254fffac000-0xffff6254fffac196, size=407, cache=kmalloc-512 allocated by task 15074 on cpu 0 at 1342.585390s: kmalloc include/linux/slab.h:568 [inline] kzalloc include/linux/slab.h:675 [inline] bpf_test_init.isra.0+0xac/0x290 net/bpf/test_run.c:191 bpf_prog_test_run_skb+0x11c/0xa7c net/bpf/test_run.c:512 bpf_prog_test_run kernel/bpf/syscall.c:3148 [inline] __do_sys_bpf kernel/bpf/syscall.c:4441 [inline] __se_sys_bpf+0xad0/0x1634 kernel/bpf/syscall.c:4381 __arm64_sys_bpf+0x50/0x60 kernel/bpf/syscall.c:4381 To fix the problem, we adjust @size so that (@size + @hearoom) is a multiple of SMP_CACHE_BYTES. So we make sure the struct skb_shared_info is aligned to a cache line. Fixes: 1cf1cae963c2 ("bpf: introduce BPF_PROG_TEST_RUN command") Signed-off-by: Baisong Zhong Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Link: https://lore.kernel.org/bpf/20221102081620.1465154-1-zhongbaisong@huawei.com --- net/bpf/test_run.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 13d578ce2a09..fcb3e6c5e03c 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -774,6 +774,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, if (user_size > size) return ERR_PTR(-EMSGSIZE); + size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) return ERR_PTR(-ENOMEM); From 4cc47e8add635408e063c98b52d56b7ceacf0b70 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 3 Nov 2022 11:30:30 -0700 Subject: [PATCH 096/963] clk: qcom: gdsc: Remove direct runtime PM calls We shouldn't be calling runtime PM APIs from within the genpd enable/disable path for a couple reasons. First, this causes an AA lockdep splat[1] because genpd can call into genpd code again while holding the genpd lock. WARNING: possible recursive locking detected 5.19.0-rc2-lockdep+ #7 Not tainted -------------------------------------------- kworker/2:1/49 is trying to acquire lock: ffffffeea0370788 (&genpd->mlock){+.+.}-{3:3}, at: genpd_lock_mtx+0x24/0x30 but task is already holding lock: ffffffeea03710a8 (&genpd->mlock){+.+.}-{3:3}, at: genpd_lock_mtx+0x24/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&genpd->mlock); lock(&genpd->mlock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/2:1/49: #0: 74ffff80811a5748 ((wq_completion)pm){+.+.}-{0:0}, at: process_one_work+0x320/0x5fc #1: ffffffc008537cf8 ((work_completion)(&genpd->power_off_work)){+.+.}-{0:0}, at: process_one_work+0x354/0x5fc #2: ffffffeea03710a8 (&genpd->mlock){+.+.}-{3:3}, at: genpd_lock_mtx+0x24/0x30 stack backtrace: CPU: 2 PID: 49 Comm: kworker/2:1 Not tainted 5.19.0-rc2-lockdep+ #7 Hardware name: Google Lazor (rev3 - 8) with KB Backlight (DT) Workqueue: pm genpd_power_off_work_fn Call trace: dump_backtrace+0x1a0/0x200 show_stack+0x24/0x30 dump_stack_lvl+0x7c/0xa0 dump_stack+0x18/0x44 __lock_acquire+0xb38/0x3634 lock_acquire+0x180/0x2d4 __mutex_lock_common+0x118/0xe30 mutex_lock_nested+0x70/0x7c genpd_lock_mtx+0x24/0x30 genpd_runtime_suspend+0x2f0/0x414 __rpm_callback+0xdc/0x1b8 rpm_callback+0x4c/0xcc rpm_suspend+0x21c/0x5f0 rpm_idle+0x17c/0x1e0 __pm_runtime_idle+0x78/0xcc gdsc_disable+0x24c/0x26c _genpd_power_off+0xd4/0x1c4 genpd_power_off+0x2d8/0x41c genpd_power_off_work_fn+0x60/0x94 process_one_work+0x398/0x5fc worker_thread+0x42c/0x6c4 kthread+0x194/0x1b4 ret_from_fork+0x10/0x20 Second, this confuses runtime PM on CoachZ for the camera devices by causing the camera clock controller's runtime PM usage_count to go negative after resuming from suspend. This is because runtime PM is being used on the clock controller while runtime PM is disabled for the device. The reason for the negative count is because a GDSC is represented as a genpd and each genpd that is attached to a device is resumed during the noirq phase of system wide suspend/resume (see the noirq suspend ops assignment in pm_genpd_init() for more details). The camera GDSCs are attached to camera devices with the 'power-domains' property in DT. Every device has runtime PM disabled in the late system suspend phase via __device_suspend_late(). Runtime PM is not usable until runtime PM is enabled in device_resume_early(). The noirq phases run after the 'late' and before the 'early' phase of suspend/resume. When the genpds are resumed in genpd_resume_noirq(), we call down into gdsc_enable() that calls pm_runtime_resume_and_get() and that returns -EACCES to indicate failure to resume because runtime PM is disabled for all devices. Upon closer inspection, calling runtime PM APIs like this in the GDSC driver doesn't make sense. It was intended to make sure the GDSC for the clock controller providing other GDSCs was enabled, specifically the MMCX GDSC for the display clk controller on SM8250 (sm8250-dispcc), so that GDSC register accesses succeeded. That will already happen because we make the 'dev->pm_domain' a parent domain of each GDSC we register in gdsc_register() via pm_genpd_add_subdomain(). When any of these GDSCs are accessed, we'll enable the parent domain (in this specific case MMCX). We also remove any getting of runtime PM during registration, because when a genpd is registered it increments the count on the parent if the genpd itself is already enabled. Cc: Dmitry Baryshkov Cc: Johan Hovold Cc: Ulf Hansson Cc: Taniya Das Cc: Satya Priya Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Cc: Matthias Kaehlcke Reported-by: Stephen Boyd Link: https://lore.kernel.org/r/CAE-0n52xbZeJ66RaKwggeRB57fUAwjvxGxfFMKOKJMKVyFTe+w@mail.gmail.com [1] Fixes: 1b771839de05 ("clk: qcom: gdsc: enable optional power domain support") Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20221103183030.3594899-1-swboyd@chromium.org Tested-by: Johan Hovold Reviewed-by: Johan Hovold Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gdsc.c | 61 ++++------------------------------------- drivers/clk/qcom/gdsc.h | 2 -- 2 files changed, 6 insertions(+), 57 deletions(-) diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 7cf5e130e92f..0f21a8a767ac 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -56,22 +55,6 @@ enum gdsc_status { GDSC_ON }; -static int gdsc_pm_runtime_get(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_resume_and_get(sc->dev); -} - -static int gdsc_pm_runtime_put(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_put_sync(sc->dev); -} - /* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */ static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status) { @@ -271,8 +254,9 @@ static void gdsc_retain_ff_on(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); } -static int _gdsc_enable(struct gdsc *sc) +static int gdsc_enable(struct generic_pm_domain *domain) { + struct gdsc *sc = domain_to_gdsc(domain); int ret; if (sc->pwrsts == PWRSTS_ON) @@ -328,22 +312,11 @@ static int _gdsc_enable(struct gdsc *sc) return 0; } -static int gdsc_enable(struct generic_pm_domain *domain) +static int gdsc_disable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); int ret; - ret = gdsc_pm_runtime_get(sc); - if (ret) - return ret; - - return _gdsc_enable(sc); -} - -static int _gdsc_disable(struct gdsc *sc) -{ - int ret; - if (sc->pwrsts == PWRSTS_ON) return gdsc_assert_reset(sc); @@ -388,18 +361,6 @@ static int _gdsc_disable(struct gdsc *sc) return 0; } -static int gdsc_disable(struct generic_pm_domain *domain) -{ - struct gdsc *sc = domain_to_gdsc(domain); - int ret; - - ret = _gdsc_disable(sc); - - gdsc_pm_runtime_put(sc); - - return ret; -} - static int gdsc_init(struct gdsc *sc) { u32 mask, val; @@ -447,11 +408,6 @@ static int gdsc_init(struct gdsc *sc) return ret; } - /* ...and the power-domain */ - ret = gdsc_pm_runtime_get(sc); - if (ret) - goto err_disable_supply; - /* * Votable GDSCs can be ON due to Vote from other masters. * If a Votable GDSC is ON, make sure we have a Vote. @@ -459,14 +415,14 @@ static int gdsc_init(struct gdsc *sc) if (sc->flags & VOTABLE) { ret = gdsc_update_collapse_bit(sc, false); if (ret) - goto err_put_rpm; + goto err_disable_supply; } /* Turn on HW trigger mode if supported */ if (sc->flags & HW_CTRL) { ret = gdsc_hwctrl(sc, true); if (ret < 0) - goto err_put_rpm; + goto err_disable_supply; } /* @@ -496,13 +452,10 @@ static int gdsc_init(struct gdsc *sc) ret = pm_genpd_init(&sc->pd, NULL, !on); if (ret) - goto err_put_rpm; + goto err_disable_supply; return 0; -err_put_rpm: - if (on) - gdsc_pm_runtime_put(sc); err_disable_supply: if (on && sc->rsupply) regulator_disable(sc->rsupply); @@ -541,8 +494,6 @@ int gdsc_register(struct gdsc_desc *desc, for (i = 0; i < num; i++) { if (!scs[i]) continue; - if (pm_runtime_enabled(dev)) - scs[i]->dev = dev; scs[i]->regmap = regmap; scs[i]->rcdev = rcdev; ret = gdsc_init(scs[i]); diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 981a12c8502d..803512688336 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -30,7 +30,6 @@ struct reset_controller_dev; * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller - * @dev: the device holding the GDSC, used for pm_runtime calls */ struct gdsc { struct generic_pm_domain pd; @@ -74,7 +73,6 @@ struct gdsc { const char *supply; struct regulator *rsupply; - struct device *dev; }; struct gdsc_desc { From 18acb7fac22ff7b36c7ea5a76b12996e7b7dbaba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:13 +0100 Subject: [PATCH 097/963] bpf: Revert ("Fix dispatcher patchable function entry to 5 bytes nop") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because __attribute__((patchable_function_entry)) is only available since GCC-8 this solution fails to build on the minimum required GCC version. Undo these changes so we might try again -- without cluttering up the patches with too many changes. This is an almost complete revert of: dbe69b299884 ("bpf: Fix dispatcher patchable function entry to 5 bytes nop") ceea991a019c ("bpf: Move bpf_dispatcher function out of ftrace locations") (notably the arch/x86/Kconfig hunk is kept). Reported-by: David Laight Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/439d8dc735bb4858875377df67f1b29a@AcuMS.aculab.com Link: https://lore.kernel.org/bpf/20221103120647.728830733@infradead.org --- arch/x86/net/bpf_jit_comp.c | 13 ------------- include/linux/bpf.h | 21 +-------------------- kernel/bpf/dispatcher.c | 6 ------ 3 files changed, 1 insertion(+), 39 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 00127abd89ee..99620428ad78 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -389,18 +388,6 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return ret; } -int __init bpf_arch_init_dispatcher_early(void *ip) -{ - const u8 *nop_insn = x86_nops[5]; - - if (is_endbr(*(u32 *)ip)) - ip += ENDBR_INSN_SIZE; - - if (memcmp(ip, nop_insn, X86_PATCH_SIZE)) - text_poke_early(ip, nop_insn, X86_PATCH_SIZE); - return 0; -} - int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0566705c1d4e..5cd95716b441 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,7 +27,6 @@ #include #include #include -#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -971,8 +970,6 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); -int __init bpf_arch_init_dispatcher_early(void *ip); - #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -986,21 +983,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); }, \ } -#define BPF_DISPATCHER_INIT_CALL(_name) \ - static int __init _name##_init(void) \ - { \ - return bpf_arch_init_dispatcher_early(_name##_func); \ - } \ - early_initcall(_name##_init) - -#ifdef CONFIG_X86_64 -#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) -#else -#define BPF_DISPATCHER_ATTRIBUTES -#endif - #define DEFINE_BPF_DISPATCHER(name) \ - notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ @@ -1010,9 +993,7 @@ int __init bpf_arch_init_dispatcher_early(void *ip); } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ - BPF_DISPATCHER_INIT(bpf_dispatcher_##name); \ - BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name); - + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 04f0a045dcaa..fa64b80b8bca 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,7 +4,6 @@ #include #include #include -#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -91,11 +90,6 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n return -ENOTSUPP; } -int __weak __init bpf_arch_init_dispatcher_early(void *ip) -{ - return -ENOTSUPP; -} - static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) { s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; From c86df29d11dfba27c0a1f5039cd6fe387fbf4239 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Nov 2022 13:00:14 +0100 Subject: [PATCH 098/963] bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dispatcher function is currently abusing the ftrace __fentry__ call location for its own purposes -- this obviously gives trouble when the dispatcher and ftrace are both in use. A previous solution tried using __attribute__((patchable_function_entry())) which works, except it is GCC-8+ only, breaking the build on the earlier still supported compilers. Instead use static_call() -- which has its own annotations and does not conflict with ftrace -- to rewrite the dispatch function. By using: return static_call()(ctx, insni, bpf_func) you get a perfect forwarding tail call as function body (iow a single jmp instruction). By having the default static_call() target be bpf_dispatcher_nop_func() it retains the default behaviour (an indirect call to the argument function). Only once a dispatcher program is attached is the target rewritten to directly call the JIT'ed image. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Tested-by: Jiri Olsa Acked-by: Björn Töpel Acked-by: Jiri Olsa Link: https://lkml.kernel.org/r/Y1/oBlK0yFk5c/Im@hirez.programming.kicks-ass.net Link: https://lore.kernel.org/bpf/20221103120647.796772565@infradead.org --- include/linux/bpf.h | 39 ++++++++++++++++++++++++++++++++++++++- kernel/bpf/dispatcher.c | 20 +++++++------------- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5cd95716b441..74c6f449d81e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -27,6 +27,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -953,6 +954,10 @@ struct bpf_dispatcher { void *rw_image; u32 image_off; struct bpf_ksym ksym; +#ifdef CONFIG_HAVE_STATIC_CALL + struct static_call_key *sc_key; + void *sc_tramp; +#endif }; static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( @@ -970,6 +975,34 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); + +/* + * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn + * indirection with a direct call to the bpf program. If the architecture does + * not have STATIC_CALL, avoid a double-indirection. + */ +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __BPF_DISPATCHER_SC_INIT(_name) \ + .sc_key = &STATIC_CALL_KEY(_name), \ + .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), + +#define __BPF_DISPATCHER_SC(name) \ + DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) + +#define __BPF_DISPATCHER_CALL(name) \ + static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) + +#define __BPF_DISPATCHER_UPDATE(_d, _new) \ + __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) + +#else +#define __BPF_DISPATCHER_SC_INIT(name) +#define __BPF_DISPATCHER_SC(name) +#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) +#define __BPF_DISPATCHER_UPDATE(_d, _new) +#endif + #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -981,25 +1014,29 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func .name = #_name, \ .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ }, \ + __BPF_DISPATCHER_SC_INIT(_name##_call) \ } #define DEFINE_BPF_DISPATCHER(name) \ + __BPF_DISPATCHER_SC(name); \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func) \ { \ - return bpf_func(ctx, insnsi); \ + return __BPF_DISPATCHER_CALL(name); \ } \ EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ struct bpf_dispatcher bpf_dispatcher_##name = \ BPF_DISPATCHER_INIT(bpf_dispatcher_##name); + #define DECLARE_BPF_DISPATCHER(name) \ unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ bpf_func_t bpf_func); \ extern struct bpf_dispatcher bpf_dispatcher_##name; + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index fa64b80b8bca..7dfb8d0d5202 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,6 +4,7 @@ #include #include #include +#include /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -104,17 +105,11 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *b static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) { - void *old, *new, *tmp; - u32 noff; - int err; + void *new, *tmp; + u32 noff = 0; - if (!prev_num_progs) { - old = NULL; - noff = 0; - } else { - old = d->image + d->image_off; + if (prev_num_progs) noff = d->image_off ^ (PAGE_SIZE / 2); - } new = d->num_progs ? d->image + noff : NULL; tmp = d->num_progs ? d->rw_image + noff : NULL; @@ -128,11 +123,10 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) return; } - err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new); - if (err || !new) - return; + __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); - d->image_off = noff; + if (new) + d->image_off = noff; } void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, From 0aa60ff5d996d4ecdd4a62699c01f6d00f798d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Concepci=C3=B3n=20Rodr=C3=ADguez?= Date: Sun, 6 Nov 2022 01:56:51 +0000 Subject: [PATCH 099/963] iio: light: apds9960: fix wrong register for gesture gain Gesture Gain Control is in REG_GCONF_2 (0xa3), not in REG_CONFIG_2 (0x90). Fixes: aff268cd532e ("iio: light: add APDS9960 ALS + promixity driver") Signed-off-by: Alejandro Concepcion-Rodriguez Acked-by: Matt Ranostay Cc: Link: https://lore.kernel.org/r/EaT-NKC-H4DNX5z4Lg9B6IWPD5TrTrYBr5DYB784wfDKQkTmzPXkoYqyUOrOgJH-xvTsEkFLcVkeAPZRUODEFI5dGziaWXwjpfBNLeNGfNc=@acoro.eu Signed-off-by: Jonathan Cameron --- drivers/iio/light/apds9960.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c index b62c139baf41..38d4c7644bef 100644 --- a/drivers/iio/light/apds9960.c +++ b/drivers/iio/light/apds9960.c @@ -54,9 +54,6 @@ #define APDS9960_REG_CONTROL_PGAIN_MASK_SHIFT 2 #define APDS9960_REG_CONFIG_2 0x90 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK 0x60 -#define APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT 5 - #define APDS9960_REG_ID 0x92 #define APDS9960_REG_STATUS 0x93 @@ -77,6 +74,9 @@ #define APDS9960_REG_GCONF_1_GFIFO_THRES_MASK_SHIFT 6 #define APDS9960_REG_GCONF_2 0xa3 +#define APDS9960_REG_GCONF_2_GGAIN_MASK 0x60 +#define APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT 5 + #define APDS9960_REG_GOFFSET_U 0xa4 #define APDS9960_REG_GOFFSET_D 0xa5 #define APDS9960_REG_GPULSE 0xa6 @@ -396,9 +396,9 @@ static int apds9960_set_pxs_gain(struct apds9960_data *data, int val) } ret = regmap_update_bits(data->regmap, - APDS9960_REG_CONFIG_2, - APDS9960_REG_CONFIG_2_GGAIN_MASK, - idx << APDS9960_REG_CONFIG_2_GGAIN_MASK_SHIFT); + APDS9960_REG_GCONF_2, + APDS9960_REG_GCONF_2_GGAIN_MASK, + idx << APDS9960_REG_GCONF_2_GGAIN_MASK_SHIFT); if (!ret) data->pxs_gain = idx; mutex_unlock(&data->lock); From 314d34fe7f0a5836cb0472950c1f17744b4efde8 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Mon, 31 Oct 2022 21:40:31 +0800 Subject: [PATCH 100/963] ASoC: soc-utils: Remove __exit for snd_soc_util_exit() snd_soc_util_exit() is called in __init snd_soc_init() for cleanup. Remove the __exit annotation for it to fix the build warning: WARNING: modpost: sound/soc/snd-soc-core.o: section mismatch in reference: init_module (section: .init.text) -> snd_soc_util_exit (section: .exit.text) Fixes: 6ec27c53886c ("ASoC: core: Fix use-after-free in snd_soc_exit()") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221031134031.256511-1-chenzhongjin@huawei.com Signed-off-by: Mark Brown --- sound/soc/soc-utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index a3b6df2378b4..a4dba0b751e7 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -264,7 +264,7 @@ int __init snd_soc_util_init(void) return ret; } -void __exit snd_soc_util_exit(void) +void snd_soc_util_exit(void) { platform_driver_unregister(&soc_dummy_driver); platform_device_unregister(soc_dummy_dev); From 3d59eaef49ca2db581156a7b77c9afc0546eefc0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 7 Nov 2022 11:04:33 +0200 Subject: [PATCH 101/963] ASoC: SOF: topology: No need to assign core ID if token parsing failed Move the return value check before attempting to assign the core ID to the swidget since we are going to fail the sof_widget_ready() and free up swidget anyways. Fixes: 909dadf21aae ("ASoC: SOF: topology: Make DAI widget parsing IPC agnostic") Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20221107090433.5146-1-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/topology.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 38855dd60617..6a0e7f3b5023 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -1344,16 +1344,6 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, break; } - if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) { - swidget->core = SOF_DSP_PRIMARY_CORE; - } else { - int core = sof_get_token_value(SOF_TKN_COMP_CORE_ID, swidget->tuples, - swidget->num_tuples); - - if (core >= 0) - swidget->core = core; - } - /* check token parsing reply */ if (ret < 0) { dev_err(scomp->dev, @@ -1365,6 +1355,16 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index, return ret; } + if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) { + swidget->core = SOF_DSP_PRIMARY_CORE; + } else { + int core = sof_get_token_value(SOF_TKN_COMP_CORE_ID, swidget->tuples, + swidget->num_tuples); + + if (core >= 0) + swidget->core = core; + } + /* bind widget to external event */ if (tw->event_type) { if (widget_ops[w->id].bind_event) { From 89cdb224f2abe37ec4ac21ba0d9ddeb5a6a9cf68 Mon Sep 17 00:00:00 2001 From: Zhu Ning Date: Fri, 28 Oct 2022 10:04:56 +0800 Subject: [PATCH 102/963] ASoC: sof_es8336: reduce pop noise on speaker The Speaker GPIO needs to be turned on slightly behind the codec turned on. It also need to be turned off slightly before the codec turned down. Current code uses delay in DAPM_EVENT to do it but the mdelay delays the DAPM itself and thus has no effect. A delayed_work is added to turn on the speaker. The Speaker is turned off in .trigger since trigger is called slightly before the DAPM events. Signed-off-by: Zhu Ning ------------ v1: cancel delayed work while disabling speaker. Link: https://lore.kernel.org/r/20221028020456.90286-1-zhuning0077@gmail.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_es8336.c | 60 ++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 14 deletions(-) diff --git a/sound/soc/intel/boards/sof_es8336.c b/sound/soc/intel/boards/sof_es8336.c index fbb42e54947a..70713e4b07dc 100644 --- a/sound/soc/intel/boards/sof_es8336.c +++ b/sound/soc/intel/boards/sof_es8336.c @@ -63,6 +63,7 @@ struct sof_es8336_private { struct snd_soc_jack jack; struct list_head hdmi_pcm_list; bool speaker_en; + struct delayed_work pcm_pop_work; }; struct sof_hdmi_pcm { @@ -111,6 +112,46 @@ static void log_quirks(struct device *dev) dev_info(dev, "quirk headset at mic1 port enabled\n"); } +static void pcm_pop_work_events(struct work_struct *work) +{ + struct sof_es8336_private *priv = + container_of(work, struct sof_es8336_private, pcm_pop_work.work); + + gpiod_set_value_cansleep(priv->gpio_speakers, priv->speaker_en); + + if (quirk & SOF_ES8336_HEADPHONE_GPIO) + gpiod_set_value_cansleep(priv->gpio_headphone, priv->speaker_en); + +} + +static int sof_8336_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_soc_card *card = rtd->card; + struct sof_es8336_private *priv = snd_soc_card_get_drvdata(card); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + case SNDRV_PCM_TRIGGER_RESUME: + break; + + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_STOP: + if (priv->speaker_en == false) + if (substream->stream == 0) { + cancel_delayed_work(&priv->pcm_pop_work); + gpiod_set_value_cansleep(priv->gpio_speakers, true); + } + break; + default: + return -EINVAL; + } + + return 0; +} + static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { @@ -122,19 +163,7 @@ static int sof_es8316_speaker_power_event(struct snd_soc_dapm_widget *w, priv->speaker_en = !SND_SOC_DAPM_EVENT_ON(event); - if (SND_SOC_DAPM_EVENT_ON(event)) - msleep(70); - - gpiod_set_value_cansleep(priv->gpio_speakers, priv->speaker_en); - - if (!(quirk & SOF_ES8336_HEADPHONE_GPIO)) - return 0; - - if (SND_SOC_DAPM_EVENT_ON(event)) - msleep(70); - - gpiod_set_value_cansleep(priv->gpio_headphone, priv->speaker_en); - + queue_delayed_work(system_wq, &priv->pcm_pop_work, msecs_to_jiffies(70)); return 0; } @@ -344,6 +373,7 @@ static int sof_es8336_hw_params(struct snd_pcm_substream *substream, /* machine stream operations */ static struct snd_soc_ops sof_es8336_ops = { .hw_params = sof_es8336_hw_params, + .trigger = sof_8336_trigger, }; static struct snd_soc_dai_link_component platform_component[] = { @@ -723,7 +753,8 @@ static int sof_es8336_probe(struct platform_device *pdev) } INIT_LIST_HEAD(&priv->hdmi_pcm_list); - + INIT_DELAYED_WORK(&priv->pcm_pop_work, + pcm_pop_work_events); snd_soc_card_set_drvdata(card, priv); if (mach->mach_params.dmic_num > 0) { @@ -752,6 +783,7 @@ static int sof_es8336_remove(struct platform_device *pdev) struct snd_soc_card *card = platform_get_drvdata(pdev); struct sof_es8336_private *priv = snd_soc_card_get_drvdata(card); + cancel_delayed_work(&priv->pcm_pop_work); gpiod_put(priv->gpio_speakers); device_remove_software_node(priv->codec_dev); put_device(priv->codec_dev); From 612695bccfdbd52004551308a55bae410e7cd22f Mon Sep 17 00:00:00 2001 From: Tomislav Novak Date: Mon, 26 Sep 2022 16:09:12 +0100 Subject: [PATCH 103/963] ARM: 9251/1: perf: Fix stacktraces for tracepoint events in THUMB2 kernels Store the frame address where arm_get_current_stackframe() looks for it (ARM_r7 instead of ARM_fp if CONFIG_THUMB2_KERNEL=y). Otherwise frame->fp gets set to 0, causing unwind_frame() to fail. # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }' Attaching 1 probe... @[ __schedule+1059 ]: 1 A typical first unwind instruction is 0x97 (SP = R7), so after executing it SP ends up being 0 and -URC_FAILURE is returned. unwind_frame(pc = ac9da7d7 lr = 00000000 sp = c69bdda0 fp = 00000000) unwind_find_idx(ac9da7d7) unwind_exec_insn: insn = 00000097 unwind_exec_insn: fp = 00000000 sp = 00000000 lr = 00000000 pc = 00000000 With this patch: # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }' Attaching 1 probe... @[ __schedule+1059 __schedule+1059 schedule+79 schedule_hrtimeout_range_clock+163 schedule_hrtimeout_range+17 ep_poll+471 SyS_epoll_wait+111 sys_epoll_pwait+231 __ret_fast_syscall+1 ]: 1 Link: https://lore.kernel.org/r/20220920230728.2617421-1-tnovak@fb.com/ Reviewed-by: Linus Walleij Signed-off-by: Tomislav Novak Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index fe87397c3d8c..bdbc1e590891 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_arch_fetch_caller_regs(regs, __ip) { \ (regs)->ARM_pc = (__ip); \ - (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ + frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ (regs)->ARM_sp = current_stack_pointer; \ (regs)->ARM_cpsr = SVC_MODE; \ } From 340a982825f76f1cff0daa605970fe47321b5ee7 Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Fri, 4 Nov 2022 21:46:18 +0100 Subject: [PATCH 104/963] ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation Actually in no-MMU SoCs(i.e. i.MXRT) ZERO_PAGE(vaddr) expands to ``` virt_to_page(0) ``` that in order expands to: ``` pfn_to_page(virt_to_pfn(0)) ``` and then virt_to_pfn(0) to: ``` ((((unsigned long)(0) - PAGE_OFFSET) >> PAGE_SHIFT) + PHYS_PFN_OFFSET) ``` where PAGE_OFFSET and PHYS_PFN_OFFSET are the DRAM offset(0x80000000) and PAGE_SHIFT is 12. This way we obtain 16MB(0x01000000) summed to the base of DRAM(0x80000000). When ZERO_PAGE(0) is then used, for example in bio_add_page(), the page gets an address that is out of DRAM bounds. So instead of using fake virtual page 0 let's allocate a dedicated zero_page during paging_init() and assign it to a global 'struct page * empty_zero_page' the same way mmu.c does and it's the same approach used in m68k with commit dc068f462179 as discussed here[0]. Then let's move ZERO_PAGE() definition to the top of pgtable.h to be in common between mmu.c and nommu.c. [0]: https://lore.kernel.org/linux-m68k/2a462b23-5b8e-bbf4-ec7d-778434a3b9d7@google.com/T/#m1266ceb63 ad140743174d6b3070364d3c9a5179b Signed-off-by: Giulio Benetti Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/pgtable-nommu.h | 6 ------ arch/arm/include/asm/pgtable.h | 16 +++++++++------- arch/arm/mm/nommu.c | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index d16aba48fa0a..090011394477 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -44,12 +44,6 @@ typedef pte_t *pte_addr_t; -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) - /* * Mark the prot value as uncacheable and unbufferable. */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 78a532068fec..ef48a55e9af8 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -10,6 +10,15 @@ #include #include +#ifndef __ASSEMBLY__ +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern struct page *empty_zero_page; +#define ZERO_PAGE(vaddr) (empty_zero_page) +#endif + #ifndef CONFIG_MMU #include @@ -139,13 +148,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, */ #ifndef __ASSEMBLY__ -/* - * ZERO_PAGE is a global shared page that is always zero: used - * for zero-mapped memory areas etc.. - */ -extern struct page *empty_zero_page; -#define ZERO_PAGE(vaddr) (empty_zero_page) - extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index c42debaded95..c1494a4dee25 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -26,6 +26,13 @@ unsigned long vectors_base; +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + #ifdef CONFIG_ARM_MPU struct mpu_rgn_info mpu_rgn_info; #endif @@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void) */ void __init paging_init(const struct machine_desc *mdesc) { + void *zero_page; + early_trap_init((void *)vectors_base); mpu_setup(); + + /* allocate the zero page. */ + zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!zero_page) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); } /* From 4854e8b083d800b421c02d3a8482fbfe27fb33ec Mon Sep 17 00:00:00 2001 From: Inbaraj E Date: Mon, 7 Nov 2022 18:13:37 +0530 Subject: [PATCH 105/963] dt-bindings: clock: exynosautov9: fix reference to CMU_FSYS1 Fix reference to CMU_FSYS1 mmc card clock to gout clock instead of dout. "gout" is output of a gate clock. Unlike any other clocks, the fsys1 mmc top clock does not have a divider, so it should be "mout -> gout" instead of "mout -> gout -> dout". This fixes make dtbs_check warning as shown below: arch/arm64/boot/dts/exynos/exynosautov9-sadk.dtb: clock-controller@17040000: clock-names:2: 'dout_clkcmu_fsys1_mmc_card' was expected Fixes: 4b6ec8d88623 ("dt-bindings: clock: exynosautov9: add schema for cmu_fsys0/1") Reviewed-by: Chanho Park Signed-off-by: Inbaraj E Link: https://lore.kernel.org/r/20221107124337.114135-1-inbaraj.e@samsung.com Signed-off-by: Krzysztof Kozlowski --- .../devicetree/bindings/clock/samsung,exynosautov9-clock.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml index 2ab4642679c0..55c4f94a14d1 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml @@ -148,7 +148,7 @@ allOf: items: - const: oscclk - const: dout_clkcmu_fsys1_bus - - const: dout_clkcmu_fsys1_mmc_card + - const: gout_clkcmu_fsys1_mmc_card - const: dout_clkcmu_fsys1_usbdrd - if: From bee55f2e7a44e7a7676e264b42f026e34bd244d9 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Tue, 18 Oct 2022 14:17:23 +0200 Subject: [PATCH 106/963] pinctrl: rockchip: list all pins in a possible mux route for PX30 The mux routes are incomplete for the PX30. This was discovered because we had a HW design using cif-clkoutm1 with the correct pinmux in the Device Tree but the clock would still not work. There are actually two muxing required: the pin muxing (performed by the usual Device Tree pinctrl nodes) and the "function" muxing (m0 vs m1; performed by the mux routing inside the driver). The pin muxing was correct but the function muxing was not. This adds the missing pins and their configuration for the mux routes that are already specified in the driver. Note that there are some "conflicts": it is possible *in Device Tree* to (attempt to) mux the pins for e.g. clkoutm1 and clkinm0 at the same time but this is actually not possible in hardware (because both share the same bit for the function muxing). Since it is an impossible hardware design, it is not deemed necessary to prevent the user from attempting to "misconfigure" the pins/functions. Fixes: 87065ca9b8e5 ("pinctrl: rockchip: Add pinctrl support for PX30") Signed-off-by: Quentin Schulz Link: https://lore.kernel.org/r/20221017-upstream-px30-cif-clkoutm1-v1-0-4ea1389237f7@theobroma-systems.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-rockchip.c | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 53bdfc40f055..da974ff2d75d 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -679,14 +679,54 @@ static void rockchip_get_recalced_mux(struct rockchip_pin_bank *bank, int pin, } static struct rockchip_mux_route_data px30_mux_route_data[] = { + RK_MUXROUTE_SAME(2, RK_PB4, 1, 0x184, BIT(16 + 7)), /* cif-d0m0 */ + RK_MUXROUTE_SAME(3, RK_PA1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d0m1 */ + RK_MUXROUTE_SAME(2, RK_PB6, 1, 0x184, BIT(16 + 7)), /* cif-d1m0 */ + RK_MUXROUTE_SAME(3, RK_PA2, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d1m1 */ RK_MUXROUTE_SAME(2, RK_PA0, 1, 0x184, BIT(16 + 7)), /* cif-d2m0 */ RK_MUXROUTE_SAME(3, RK_PA3, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d2m1 */ + RK_MUXROUTE_SAME(2, RK_PA1, 1, 0x184, BIT(16 + 7)), /* cif-d3m0 */ + RK_MUXROUTE_SAME(3, RK_PA5, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d3m1 */ + RK_MUXROUTE_SAME(2, RK_PA2, 1, 0x184, BIT(16 + 7)), /* cif-d4m0 */ + RK_MUXROUTE_SAME(3, RK_PA7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d4m1 */ + RK_MUXROUTE_SAME(2, RK_PA3, 1, 0x184, BIT(16 + 7)), /* cif-d5m0 */ + RK_MUXROUTE_SAME(3, RK_PB0, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d5m1 */ + RK_MUXROUTE_SAME(2, RK_PA4, 1, 0x184, BIT(16 + 7)), /* cif-d6m0 */ + RK_MUXROUTE_SAME(3, RK_PB1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d6m1 */ + RK_MUXROUTE_SAME(2, RK_PA5, 1, 0x184, BIT(16 + 7)), /* cif-d7m0 */ + RK_MUXROUTE_SAME(3, RK_PB4, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d7m1 */ + RK_MUXROUTE_SAME(2, RK_PA6, 1, 0x184, BIT(16 + 7)), /* cif-d8m0 */ + RK_MUXROUTE_SAME(3, RK_PB6, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d8m1 */ + RK_MUXROUTE_SAME(2, RK_PA7, 1, 0x184, BIT(16 + 7)), /* cif-d9m0 */ + RK_MUXROUTE_SAME(3, RK_PB7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d9m1 */ + RK_MUXROUTE_SAME(2, RK_PB7, 1, 0x184, BIT(16 + 7)), /* cif-d10m0 */ + RK_MUXROUTE_SAME(3, RK_PC6, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d10m1 */ + RK_MUXROUTE_SAME(2, RK_PC0, 1, 0x184, BIT(16 + 7)), /* cif-d11m0 */ + RK_MUXROUTE_SAME(3, RK_PC7, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-d11m1 */ + RK_MUXROUTE_SAME(2, RK_PB0, 1, 0x184, BIT(16 + 7)), /* cif-vsyncm0 */ + RK_MUXROUTE_SAME(3, RK_PD1, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-vsyncm1 */ + RK_MUXROUTE_SAME(2, RK_PB1, 1, 0x184, BIT(16 + 7)), /* cif-hrefm0 */ + RK_MUXROUTE_SAME(3, RK_PD2, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-hrefm1 */ + RK_MUXROUTE_SAME(2, RK_PB2, 1, 0x184, BIT(16 + 7)), /* cif-clkinm0 */ + RK_MUXROUTE_SAME(3, RK_PD3, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-clkinm1 */ + RK_MUXROUTE_SAME(2, RK_PB3, 1, 0x184, BIT(16 + 7)), /* cif-clkoutm0 */ + RK_MUXROUTE_SAME(3, RK_PD0, 3, 0x184, BIT(16 + 7) | BIT(7)), /* cif-clkoutm1 */ RK_MUXROUTE_SAME(3, RK_PC6, 2, 0x184, BIT(16 + 8)), /* pdm-m0 */ RK_MUXROUTE_SAME(2, RK_PC6, 1, 0x184, BIT(16 + 8) | BIT(8)), /* pdm-m1 */ + RK_MUXROUTE_SAME(3, RK_PD3, 2, 0x184, BIT(16 + 8)), /* pdm-sdi0m0 */ + RK_MUXROUTE_SAME(2, RK_PC5, 2, 0x184, BIT(16 + 8) | BIT(8)), /* pdm-sdi0m1 */ RK_MUXROUTE_SAME(1, RK_PD3, 2, 0x184, BIT(16 + 10)), /* uart2-rxm0 */ RK_MUXROUTE_SAME(2, RK_PB6, 2, 0x184, BIT(16 + 10) | BIT(10)), /* uart2-rxm1 */ + RK_MUXROUTE_SAME(1, RK_PD2, 2, 0x184, BIT(16 + 10)), /* uart2-txm0 */ + RK_MUXROUTE_SAME(2, RK_PB4, 2, 0x184, BIT(16 + 10) | BIT(10)), /* uart2-txm1 */ RK_MUXROUTE_SAME(0, RK_PC1, 2, 0x184, BIT(16 + 9)), /* uart3-rxm0 */ RK_MUXROUTE_SAME(1, RK_PB7, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-rxm1 */ + RK_MUXROUTE_SAME(0, RK_PC0, 2, 0x184, BIT(16 + 9)), /* uart3-txm0 */ + RK_MUXROUTE_SAME(1, RK_PB6, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-txm1 */ + RK_MUXROUTE_SAME(0, RK_PC2, 2, 0x184, BIT(16 + 9)), /* uart3-ctsm0 */ + RK_MUXROUTE_SAME(1, RK_PB4, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-ctsm1 */ + RK_MUXROUTE_SAME(0, RK_PC3, 2, 0x184, BIT(16 + 9)), /* uart3-rtsm0 */ + RK_MUXROUTE_SAME(1, RK_PB5, 2, 0x184, BIT(16 + 9) | BIT(9)), /* uart3-rtsm1 */ }; static struct rockchip_mux_route_data rv1126_mux_route_data[] = { From 7df140e84a75c89962feef659d686303d3ce75e5 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Fri, 21 Oct 2022 18:53:04 +0200 Subject: [PATCH 107/963] mtd: rawnand: qcom: handle ret from parse with codeword_fixup With use_codeword_fixup enabled, any return from mtd_device_parse_register gets overwritten. Aside from the clear bug, this is also problematic as a parser can EPROBE_DEFER and because this is not correctly handled, the nand is never rescanned later in the bootup process. An example of this problem is when smem requires additional time to be probed and nandc use qcomsmempart as parser. Parser will return EPROBE_DEFER but in the current code this ret gets overwritten by qcom_nand_host_parse_boot_partitions and qcom_nand_host_init_and_register return 0. Correctly handle the return code from mtd_device_parse_register so that any error from this function is not ignored. Fixes: 862bdedd7f4b ("mtd: nand: raw: qcom_nandc: add support for unprotected spare data pages") Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Christian Marangi Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221021165304.19991-1-ansuelsmth@gmail.com --- drivers/mtd/nand/raw/qcom_nandc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 8f80019a9f01..198a44794d2d 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -3167,16 +3167,18 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc, ret = mtd_device_parse_register(mtd, probes, NULL, NULL, 0); if (ret) - nand_cleanup(chip); + goto err; if (nandc->props->use_codeword_fixup) { ret = qcom_nand_host_parse_boot_partitions(nandc, host, dn); - if (ret) { - nand_cleanup(chip); - return ret; - } + if (ret) + goto err; } + return 0; + +err: + nand_cleanup(chip); return ret; } From 83f0869e9bf3333d778d62f055b0f8e1de1cc812 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Mon, 24 Oct 2022 11:20:26 +0200 Subject: [PATCH 108/963] mtd: rawnand: placate "$VARIABLE is used uninitialized" warnings The compiler is not smart enough to notice that it's impossible for them to be actually used uninitialized. Which exact variables trip here varies depending on random surrounding code; none triggered in 6.1-rc1 but 6.1-rc2 fails on three of these five, despite variables declared in the very same line having identical flow. Signed-off-by: Adam Borowski Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221024092026.42123-1-kilobyte@angband.pl --- drivers/mtd/nand/raw/nand_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 33f2c98a030e..c3cc66039925 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5834,7 +5834,7 @@ nand_match_ecc_req(struct nand_chip *chip, int req_step = requirements->step_size; int req_strength = requirements->strength; int req_corr, step_size, strength, nsteps, ecc_bytes, ecc_bytes_total; - int best_step, best_strength, best_ecc_bytes; + int best_step = 0, best_strength = 0, best_ecc_bytes = 0; int best_ecc_bytes_total = INT_MAX; int i, j; @@ -5915,7 +5915,7 @@ nand_maximize_ecc(struct nand_chip *chip, int step_size, strength, nsteps, ecc_bytes, corr; int best_corr = 0; int best_step = 0; - int best_strength, best_ecc_bytes; + int best_strength = 0, best_ecc_bytes = 0; int i, j; for (i = 0; i < caps->nstepinfos; i++) { From c717b9b7d6de9e024e47f7cd5bbff49f581d3db9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 7 Nov 2022 10:15:20 +0100 Subject: [PATCH 109/963] mtd: onenand: omap2: add dependency on GPMC OMAP2 OneNAND driver uses gpmc_omap_onenand_set_timings() provided by OMAP_GPMC driver, so the latter cannot be module if OneNAND driver is built-in: /usr/bin/arm-linux-gnueabi-ld: drivers/mtd/nand/onenand/onenand_omap2.o: in function `omap2_onenand_probe': onenand_omap2.c:(.text+0x520): undefined reference to `gpmc_omap_onenand_set_timings' The OMAP_GPMC is also a runtime dependency. Reported-by: kernel test robot Fixes: 854fd9209b20 ("memory: omap-gpmc: Allow building as a module") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Roger Quadros Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221107091520.127053-1-krzysztof.kozlowski@linaro.org --- drivers/mtd/nand/onenand/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/onenand/Kconfig b/drivers/mtd/nand/onenand/Kconfig index 34d9a7a82ad4..c94bf483541e 100644 --- a/drivers/mtd/nand/onenand/Kconfig +++ b/drivers/mtd/nand/onenand/Kconfig @@ -26,6 +26,7 @@ config MTD_ONENAND_OMAP2 tristate "OneNAND on OMAP2/OMAP3 support" depends on ARCH_OMAP2 || ARCH_OMAP3 || (COMPILE_TEST && ARM) depends on OF || COMPILE_TEST + depends on OMAP_GPMC help Support for a OneNAND flash device connected to an OMAP2/OMAP3 SoC via the GPMC memory controller. From f6b1a1cf1c3ee430d3f5e47847047ce789a690aa Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 22 Sep 2022 20:04:34 +0800 Subject: [PATCH 110/963] ext4: fix use-after-free in ext4_ext_shift_extents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the starting position of our insert range happens to be in the hole between the two ext4_extent_idx, because the lblk of the ext4_extent in the previous ext4_extent_idx is always less than the start, which leads to the "extent" variable access across the boundary, the following UAF is triggered: ================================================================== BUG: KASAN: use-after-free in ext4_ext_shift_extents+0x257/0x790 Read of size 4 at addr ffff88819807a008 by task fallocate/8010 CPU: 3 PID: 8010 Comm: fallocate Tainted: G E 5.10.0+ #492 Call Trace: dump_stack+0x7d/0xa3 print_address_description.constprop.0+0x1e/0x220 kasan_report.cold+0x67/0x7f ext4_ext_shift_extents+0x257/0x790 ext4_insert_range+0x5b6/0x700 ext4_fallocate+0x39e/0x3d0 vfs_fallocate+0x26f/0x470 ksys_fallocate+0x3a/0x70 __x64_sys_fallocate+0x4f/0x60 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ================================================================== For right shifts, we can divide them into the following situations: 1. When the first ee_block of ext4_extent_idx is greater than or equal to start, make right shifts directly from the first ee_block. 1) If it is greater than start, we need to continue searching in the previous ext4_extent_idx. 2) If it is equal to start, we can exit the loop (iterator=NULL). 2. When the first ee_block of ext4_extent_idx is less than start, then traverse from the last extent to find the first extent whose ee_block is less than start. 1) If extent is still the last extent after traversal, it means that the last ee_block of ext4_extent_idx is less than start, that is, start is located in the hole between idx and (idx+1), so we can exit the loop directly (break) without right shifts. 2) Otherwise, make right shifts at the corresponding position of the found extent, and then exit the loop (iterator=NULL). Fixes: 331573febb6a ("ext4: Add support FALLOC_FL_INSERT_RANGE for fallocate") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Zhihao Cheng Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20220922120434.1294789-1-libaokun1@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index f1956288307f..6c399a8b22b3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5184,6 +5184,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * and it is decreased till we reach start. */ again: + ret = 0; if (SHIFT == SHIFT_LEFT) iterator = &start; else @@ -5227,14 +5228,21 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, ext4_ext_get_actual_len(extent); } else { extent = EXT_FIRST_EXTENT(path[depth].p_hdr); - if (le32_to_cpu(extent->ee_block) > 0) + if (le32_to_cpu(extent->ee_block) > start) *iterator = le32_to_cpu(extent->ee_block) - 1; - else - /* Beginning is reached, end of the loop */ + else if (le32_to_cpu(extent->ee_block) == start) iterator = NULL; - /* Update path extent in case we need to stop */ - while (le32_to_cpu(extent->ee_block) < start) + else { + extent = EXT_LAST_EXTENT(path[depth].p_hdr); + while (le32_to_cpu(extent->ee_block) >= start) + extent--; + + if (extent == EXT_LAST_EXTENT(path[depth].p_hdr)) + break; + extent++; + iterator = NULL; + } path[depth].p_ext = extent; } ret = ext4_ext_shift_path_extents(path, shift, inode, From b8ebf250997c5fb253582f42bfe98673801ebebd Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 7 Nov 2022 10:21:40 -0800 Subject: [PATCH 111/963] Input: iforce - invert valid length check when fetching device IDs syzbot is reporting uninitialized value at iforce_init_device() [1], for commit 6ac0aec6b0a6 ("Input: iforce - allow callers supply data buffer when fetching device IDs") is checking that valid length is shorter than bytes to read. Since iforce_get_id_packet() stores valid length when returning 0, the caller needs to check that valid length is longer than or equals to bytes to read. Reported-by: syzbot Signed-off-by: Tetsuo Handa Fixes: 6ac0aec6b0a6 ("Input: iforce - allow callers supply data buffer when fetching device IDs") Link: https://lore.kernel.org/r/531fb432-7396-ad37-ecba-3e42e7f56d5c@I-love.SAKURA.ne.jp Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/iforce/iforce-main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c index b86de1312512..84b87526b7ba 100644 --- a/drivers/input/joystick/iforce/iforce-main.c +++ b/drivers/input/joystick/iforce/iforce-main.c @@ -273,22 +273,22 @@ int iforce_init_device(struct device *parent, u16 bustype, * Get device info. */ - if (!iforce_get_id_packet(iforce, 'M', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'M', buf, &len) && len >= 3) input_dev->id.vendor = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet M\n"); - if (!iforce_get_id_packet(iforce, 'P', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'P', buf, &len) && len >= 3) input_dev->id.product = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet P\n"); - if (!iforce_get_id_packet(iforce, 'B', buf, &len) || len < 3) + if (!iforce_get_id_packet(iforce, 'B', buf, &len) && len >= 3) iforce->device_memory.end = get_unaligned_le16(buf + 1); else dev_warn(&iforce->dev->dev, "Device does not respond to id packet B\n"); - if (!iforce_get_id_packet(iforce, 'N', buf, &len) || len < 2) + if (!iforce_get_id_packet(iforce, 'N', buf, &len) && len >= 2) ff_effects = buf[1]; else dev_warn(&iforce->dev->dev, "Device does not respond to id packet N\n"); From 8e9ada1d0e72b4737df400fe1bba48dc42a68df7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Nov 2022 10:30:25 -0800 Subject: [PATCH 112/963] Input: soc_button_array - add use_low_level_irq module parameter It seems that the Windows drivers for the ACPI0011 soc_button_array device use low level triggered IRQs rather then using edge triggering. Some ACPI tables depend on this, directly poking the GPIO controller's registers to clear the trigger type when closing a laptop's/2-in-1's lid and re-instating the trigger when opening the lid again. Linux sets the edge/level on which to trigger to both low+high since it is using edge type IRQs, the ACPI tables then ends up also setting the bit for level IRQs and since both low and high level have been selected by Linux we get an IRQ storm leading to soft lockups. As a workaround for this the soc_button_array already contains a DMI quirk table with device models known to have this issue. Add a module parameter for this so that users can easily test if their device is affected too and so that they can use the module parameter as a workaround. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221106215320.67109-1-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/soc_button_array.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index 480476121c01..50497dd05027 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -18,6 +18,10 @@ #include #include +static bool use_low_level_irq; +module_param(use_low_level_irq, bool, 0444); +MODULE_PARM_DESC(use_low_level_irq, "Use low-level triggered IRQ instead of edge triggered"); + struct soc_button_info { const char *name; int acpi_index; @@ -164,7 +168,8 @@ soc_button_device_create(struct platform_device *pdev, } /* See dmi_use_low_level_irq[] comment */ - if (!autorepeat && dmi_check_system(dmi_use_low_level_irq)) { + if (!autorepeat && (use_low_level_irq || + dmi_check_system(dmi_use_low_level_irq))) { irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); gpio_keys[n_buttons].irq = irq; gpio_keys[n_buttons].gpio = -ENOENT; From e13757f52496444b994a7ac67b6e517a15d89bbc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Nov 2022 10:30:41 -0800 Subject: [PATCH 113/963] Input: soc_button_array - add Acer Switch V 10 to dmi_use_low_level_irq[] Like on the Acer Switch 10 SW5-012, the Acer Switch V 10 SW5-017's _LID method messes with home- and power-button GPIO IRQ settings, causing an IRQ storm. Add a quirk entry for the Acer Switch V 10 to the dmi_use_low_level_irq[] DMI quirk list, to use low-level IRQs on this model, fixing the IRQ storm. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221106215320.67109-2-hdegoede@redhat.com Signed-off-by: Dmitry Torokhov --- drivers/input/misc/soc_button_array.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index 50497dd05027..09489380afda 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -77,6 +77,13 @@ static const struct dmi_system_id dmi_use_low_level_irq[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"), }, }, + { + /* Acer Switch V 10 SW5-017, same issue as Acer Switch 10 SW5-012. */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "SW5-017"), + }, + }, { /* * Acer One S1003. _LID method messes with power-button GPIO From a679120edfcf3d63f066f53afd425d51b480e533 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 7 Nov 2022 10:07:11 -0700 Subject: [PATCH 114/963] bpf: Add explicit cast to 'void *' for __BPF_DISPATCHER_UPDATE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with clang: kernel/bpf/dispatcher.c:126:33: error: pointer type mismatch ('void *' and 'unsigned int (*)(const void *, const struct bpf_insn *, bpf_func_t)' (aka 'unsigned int (*)(const void *, const struct bpf_insn *, unsigned int (*)(const void *, const struct bpf_insn *))')) [-Werror,-Wpointer-type-mismatch] __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~ ./include/linux/bpf.h:1045:54: note: expanded from macro '__BPF_DISPATCHER_UPDATE' __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) ^~~~ 1 error generated. The warning is pointing out that the type of new ('void *') and &bpf_dispatcher_nop_func are not compatible, which could have side effects coming out of a conditional operator due to promotion rules. Add the explicit cast to 'void *' to make it clear that this is expected, as __BPF_DISPATCHER_UPDATE() expands to a call to __static_call_update(), which expects a 'void *' as its final argument. Fixes: c86df29d11df ("bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace)") Link: https://github.com/ClangBuiltLinux/linux/issues/1755 Reported-by: kernel test robot Reported-by: "kernelci.org bot" Signed-off-by: Nathan Chancellor Acked-by: Björn Töpel Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221107170711.42409-1-nathan@kernel.org Signed-off-by: Martin KaFai Lau --- kernel/bpf/dispatcher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 7dfb8d0d5202..c19719f48ce0 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -123,7 +123,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) return; } - __BPF_DISPATCHER_UPDATE(d, new ?: &bpf_dispatcher_nop_func); + __BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func); if (new) d->image_off = noff; From e5126de138caef0eedb3d6431059c0c5581a1a5d Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Fri, 21 Oct 2022 16:53:25 +0800 Subject: [PATCH 115/963] erofs: fix general protection fault when reading fragment As syzbot reported [1], the fragment feature sb flag is not set, so packed_inode != NULL needs to be checked in z_erofs_read_fragment(). [1] https://lore.kernel.org/all/0000000000002e7a8905eb841ddd@google.com/ Reported-by: syzbot+3faecbfd845a895c04cb@syzkaller.appspotmail.com Fixes: b15b2e307c3a ("erofs: support on-disk compressed fragments data") Signed-off-by: Yue Hu Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20221021085325.25788-1-zbestahu@gmail.com Signed-off-by: Gao Xiang --- fs/erofs/zdata.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index c7f24fc7efd5..d6caf275be77 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -660,6 +660,9 @@ static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos, u8 *src, *dst; unsigned int i, cnt; + if (!packed_inode) + return -EFSCORRUPTED; + pos += EROFS_I(inode)->z_fragmentoff; for (i = 0; i < len; i += cnt) { cnt = min_t(unsigned int, len - i, From 75e43355cbe4d5948a79bd592f2ffecb9f75f75d Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Fri, 4 Nov 2022 13:40:27 +0800 Subject: [PATCH 116/963] erofs: put metabuf in error path in fscache mode For tail packing layout, put metabuf when error is encountered. Fixes: 1ae9470c3e14 ("erofs: clean up .read_folio() and .readahead() in fscache mode") Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Reviewed-by: Jia Zhu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20221104054028.52208-2-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/fscache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index fe05bc51f9f2..83559008bfa8 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -287,8 +287,10 @@ static int erofs_fscache_data_read(struct address_space *mapping, return PTR_ERR(src); iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); - if (copy_to_iter(src + offset, size, &iter) != size) + if (copy_to_iter(src + offset, size, &iter) != size) { + erofs_put_metabuf(&buf); return -EFAULT; + } iov_iter_zero(PAGE_SIZE - size, &iter); erofs_put_metabuf(&buf); return PAGE_SIZE; From e6d9f9ba111b56154f1b1120252aff269cebd49c Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Fri, 4 Nov 2022 13:40:28 +0800 Subject: [PATCH 117/963] erofs: get correct count for unmapped range in fscache mode For unmapped range, the returned map.m_llen is zero, and thus the calculated count is unexpected zero. Prior to the refactoring introduced by commit 1ae9470c3e14 ("erofs: clean up .read_folio() and .readahead() in fscache mode"), only the readahead routine suffers from this. With the refactoring of making .read_folio() and .readahead() calling one common routine, both read_folio and readahead have this issue now. Fix this by calculating count separately in unmapped condition. Fixes: c665b394b9e8 ("erofs: implement fscache-based data readahead") Fixes: 1ae9470c3e14 ("erofs: clean up .read_folio() and .readahead() in fscache mode") Signed-off-by: Jingbo Xu Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20221104054028.52208-3-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/fscache.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 83559008bfa8..260fa4737fc0 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -296,15 +296,16 @@ static int erofs_fscache_data_read(struct address_space *mapping, return PAGE_SIZE; } - count = min_t(size_t, map.m_llen - (pos - map.m_la), len); - DBG_BUGON(!count || count % PAGE_SIZE); - if (!(map.m_flags & EROFS_MAP_MAPPED)) { + count = len; iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); iov_iter_zero(count, &iter); return count; } + count = min_t(size_t, map.m_llen - (pos - map.m_la), len); + DBG_BUGON(!count || count % PAGE_SIZE); + mdev = (struct erofs_map_dev) { .m_deviceid = map.m_deviceid, .m_pa = map.m_pa, From bb88f9695460bec25aa30ba9072595025cf6c8af Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 31 Oct 2022 10:35:13 +0100 Subject: [PATCH 118/963] perf: Improve missing SIGTRAP checking To catch missing SIGTRAP we employ a WARN in __perf_event_overflow(), which fires if pending_sigtrap was already set: returning to user space without consuming pending_sigtrap, and then having the event fire again would re-enter the kernel and trigger the WARN. This, however, seemed to miss the case where some events not associated with progress in the user space task can fire and the interrupt handler runs before the IRQ work meant to consume pending_sigtrap (and generate the SIGTRAP). syzbot gifted us this stack trace: | WARNING: CPU: 0 PID: 3607 at kernel/events/core.c:9313 __perf_event_overflow | Modules linked in: | CPU: 0 PID: 3607 Comm: syz-executor100 Not tainted 6.1.0-rc2-syzkaller-00073-g88619e77b33d #0 | Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 | RIP: 0010:__perf_event_overflow+0x498/0x540 kernel/events/core.c:9313 | <...> | Call Trace: | | perf_swevent_hrtimer+0x34f/0x3c0 kernel/events/core.c:10729 | __run_hrtimer kernel/time/hrtimer.c:1685 [inline] | __hrtimer_run_queues+0x1c6/0xfb0 kernel/time/hrtimer.c:1749 | hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811 | local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1096 [inline] | __sysvec_apic_timer_interrupt+0x17c/0x640 arch/x86/kernel/apic/apic.c:1113 | sysvec_apic_timer_interrupt+0x40/0xc0 arch/x86/kernel/apic/apic.c:1107 | asm_sysvec_apic_timer_interrupt+0x16/0x20 arch/x86/include/asm/idtentry.h:649 | <...> | In this case, syzbot produced a program with event type PERF_TYPE_SOFTWARE and config PERF_COUNT_SW_CPU_CLOCK. The hrtimer manages to fire again before the IRQ work got a chance to run, all while never having returned to user space. Improve the WARN to check for real progress in user space: approximate this by storing a 32-bit hash of the current IP into pending_sigtrap, and if an event fires while pending_sigtrap still matches the previous IP, we assume no progress (false negatives are possible given we could return to user space and trigger again on the same IP). Fixes: ca6c21327c6a ("perf: Fix missing SIGTRAPs") Reported-by: syzbot+b8ded3e2e2c6adde4990@syzkaller.appspotmail.com Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221031093513.3032814-1-elver@google.com --- kernel/events/core.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4ec3717003d5..884871427a94 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9306,14 +9306,27 @@ static int __perf_event_overflow(struct perf_event *event, } if (event->attr.sigtrap) { - /* - * Should not be able to return to user space without processing - * pending_sigtrap (kernel events can overflow multiple times). - */ - WARN_ON_ONCE(event->pending_sigtrap && event->attr.exclude_kernel); + unsigned int pending_id = 1; + + if (regs) + pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1; if (!event->pending_sigtrap) { - event->pending_sigtrap = 1; + event->pending_sigtrap = pending_id; local_inc(&event->ctx->nr_pending); + } else if (event->attr.exclude_kernel) { + /* + * Should not be able to return to user space without + * consuming pending_sigtrap; with exceptions: + * + * 1. Where !exclude_kernel, events can overflow again + * in the kernel without returning to user space. + * + * 2. Events that can overflow again before the IRQ- + * work without user space progress (e.g. hrtimer). + * To approximate progress (with false negatives), + * check 32-bit hash of the current IP. + */ + WARN_ON_ONCE(event->pending_sigtrap != pending_id); } event->pending_addr = data->addr; irq_work_queue(&event->pending_irq); From f04a2862f9c3f64962b8709c75d788efba6df26b Mon Sep 17 00:00:00 2001 From: Anjana Hari Date: Thu, 3 Nov 2022 11:10:51 -0700 Subject: [PATCH 119/963] pinctrl: qcom: sc8280xp: Rectify UFS reset pins UFS reset pin offsets are wrongly configured for SC8280XP, correcting the same for both UFS instances here. Signed-off-by: Anjana Hari Signed-off-by: Bjorn Andersson Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney # QDrive3 Link: https://lore.kernel.org/r/20221103181051.26912-1-quic_bjorande@quicinc.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sc8280xp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c index aa2075390f3e..e96c00686a25 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c +++ b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c @@ -1873,8 +1873,8 @@ static const struct msm_pingroup sc8280xp_groups[] = { [225] = PINGROUP(225, hs3_mi2s, phase_flag, _, _, _, _, egpio), [226] = PINGROUP(226, hs3_mi2s, phase_flag, _, _, _, _, egpio), [227] = PINGROUP(227, hs3_mi2s, phase_flag, _, _, _, _, egpio), - [228] = UFS_RESET(ufs_reset, 0xf1004), - [229] = UFS_RESET(ufs1_reset, 0xf3004), + [228] = UFS_RESET(ufs_reset, 0xf1000), + [229] = UFS_RESET(ufs1_reset, 0xf3000), [230] = SDC_QDSD_PINGROUP(sdc2_clk, 0xe8000, 14, 6), [231] = SDC_QDSD_PINGROUP(sdc2_cmd, 0xe8000, 11, 3), [232] = SDC_QDSD_PINGROUP(sdc2_data, 0xe8000, 9, 0), From cd136706b4f925aa5d316642543babac90d45910 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 7 Nov 2022 10:07:53 +0100 Subject: [PATCH 120/963] USB: bcma: Make GPIO explicitly optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit What the code does is to not check the return value from devm_gpiod_get() and then avoid using an erroneous GPIO descriptor with IS_ERR_OR_NULL(). This will miss real errors from the GPIO core that should not be ignored, such as probe deferral. Instead request the GPIO as explicitly optional, which means that if it doesn't exist, the descriptor returned will be NULL. Then we can add error handling and also avoid just doing this on the device tree path, and simplify the site where the optional GPIO descriptor is used. There were some problems with cleaning up this GPIO descriptor use in the past, but this is the proper way to deal with it. Cc: Rafał Miłecki Cc: Chuhong Yuan Signed-off-by: Linus Walleij Cc: stable Link: https://lore.kernel.org/r/20221107090753.1404679-1-linus.walleij@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/bcma-hcd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c index 2df52f75f6b3..7558cc4d90cc 100644 --- a/drivers/usb/host/bcma-hcd.c +++ b/drivers/usb/host/bcma-hcd.c @@ -285,7 +285,7 @@ static void bcma_hci_platform_power_gpio(struct bcma_device *dev, bool val) { struct bcma_hcd_device *usb_dev = bcma_get_drvdata(dev); - if (IS_ERR_OR_NULL(usb_dev->gpio_desc)) + if (!usb_dev->gpio_desc) return; gpiod_set_value(usb_dev->gpio_desc, val); @@ -406,9 +406,11 @@ static int bcma_hcd_probe(struct bcma_device *core) return -ENOMEM; usb_dev->core = core; - if (core->dev.of_node) - usb_dev->gpio_desc = devm_gpiod_get(&core->dev, "vcc", - GPIOD_OUT_HIGH); + usb_dev->gpio_desc = devm_gpiod_get_optional(&core->dev, "vcc", + GPIOD_OUT_HIGH); + if (IS_ERR(usb_dev->gpio_desc)) + return dev_err_probe(&core->dev, PTR_ERR(usb_dev->gpio_desc), + "error obtaining VCC GPIO"); switch (core->id.id) { case BCMA_CORE_USB20_HOST: From ffb9da4a04c69567bad717707b6fdfbc4c216ef4 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 7 Nov 2022 18:45:44 -0800 Subject: [PATCH 121/963] usb: dwc3: gadget: Return -ESHUTDOWN on ep disable The usb_request API clearly noted that removed requests due to disabled endpoint should have -ESHUTDOWN status returned. Don't change this behavior. Fixes: b44c0e7fef51 ("usb: dwc3: gadget: conditionally remove requests") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/3421859485cb32d77e2068549679a6c07a7797bc.1667875427.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 5fe2d136dff5..026d4029bda6 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1029,7 +1029,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->endpoint.desc = NULL; } - dwc3_remove_requests(dwc, dep, -ECONNRESET); + dwc3_remove_requests(dwc, dep, -ESHUTDOWN); dep->stream_capable = false; dep->type = 0; From 5c294de36e7fb3e0cba0c4e1ef9a5f57bc080d0f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 3 Nov 2022 15:46:48 +0100 Subject: [PATCH 122/963] Revert "usb: dwc3: disable USB core PHY management" This reverts commit 6000b8d900cd5f52fbcd0776d0cc396e88c8c2ea. The offending commit disabled the USB core PHY management as the dwc3 already manages the PHYs in question. Unfortunately some platforms have started relying on having USB core also controlling the PHY and this is specifically currently needed on some Exynos platforms for PHY calibration or connected device may fail to enumerate. The PHY calibration was previously handled in the dwc3 driver, but to work around some issues related to how the dwc3 driver interacts with xhci (e.g. using multiple drivers) this was moved to USB core by commits 34c7ed72f4f0 ("usb: core: phy: add support for PHY calibration") and a0a465569b45 ("usb: dwc3: remove generic PHY calibrate() calls"). The same PHY obviously should not be controlled from two different places, which for example do no agree on the PHY mode or power state during suspend, but as the offending patch was backported to stable, let's revert it for now. Reported-by: Stefan Agner Link: https://lore.kernel.org/lkml/808bdba846bb60456adf10a3016911ee@agner.ch/ Fixes: 6000b8d900cd ("usb: dwc3: disable USB core PHY management") Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Tested-by: Marek Szyprowski Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20221103144648.14197-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/host.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index a7154fe8206d..f6f13e7f1ba1 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -11,13 +11,8 @@ #include #include -#include "../host/xhci-plat.h" #include "core.h" -static const struct xhci_plat_priv dwc3_xhci_plat_priv = { - .quirks = XHCI_SKIP_PHY_INIT, -}; - static void dwc3_host_fill_xhci_irq_res(struct dwc3 *dwc, int irq, char *name) { @@ -97,11 +92,6 @@ int dwc3_host_init(struct dwc3 *dwc) goto err; } - ret = platform_device_add_data(xhci, &dwc3_xhci_plat_priv, - sizeof(dwc3_xhci_plat_priv)); - if (ret) - goto err; - memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); if (dwc->usb3_lpm_capable) From 341fd15e2e18c24d5c738496cfc3d7a272241201 Mon Sep 17 00:00:00 2001 From: Yassine Oudjana Date: Mon, 7 Nov 2022 18:33:17 +0300 Subject: [PATCH 123/963] extcon: usbc-tusb320: Call the Type-C IRQ handler only if a port is registered Commit bf7571c00dca ("extcon: usbc-tusb320: Add USB TYPE-C support") added an optional Type-C interface to the driver but missed to check if it is in use when calling the IRQ handler. This causes an oops on devices currently using the old extcon interface. Check if a Type-C port is registered before calling the Type-C IRQ handler. Fixes: bf7571c00dca ("extcon: usbc-tusb320: Add USB TYPE-C support") Signed-off-by: Yassine Oudjana Reviewed-by: Marek Vasut Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221107153317.657803-1-y.oudjana@protonmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/extcon/extcon-usbc-tusb320.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c index 41041ff0fadb..2a120d8d3c27 100644 --- a/drivers/extcon/extcon-usbc-tusb320.c +++ b/drivers/extcon/extcon-usbc-tusb320.c @@ -327,7 +327,13 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id) return IRQ_NONE; tusb320_extcon_irq_handler(priv, reg); - tusb320_typec_irq_handler(priv, reg); + + /* + * Type-C support is optional. Only call the Type-C handler if a + * port had been registered previously. + */ + if (priv->port) + tusb320_typec_irq_handler(priv, reg); regmap_write(priv->regmap, TUSB320_REG9, reg); From 40bf8f162d0f95e0716e479d7db41443d931765c Mon Sep 17 00:00:00 2001 From: Rajat Khandelwal Date: Mon, 24 Oct 2022 22:46:11 +0530 Subject: [PATCH 124/963] usb: typec: mux: Enter safe mode only when pins need to be reconfigured There is no point to enter safe mode during DP/TBT configuration if the DP/TBT was already configured in mux. This is because safe mode is only applicable when there is a need to reconfigure the pins in order to avoid damage within/to port partner. In some chrome systems, IOM/mux is already configured before OS comes up. Thus, when driver is probed, it blindly enters safe mode due to PD negotiations but only after gfx driver lowers dp_phy_ownership, will the IOM complete safe mode and send an ack to PMC. Since, that never happens, we see IPC timeout. Hence, allow safe mode only when pin reconfiguration is not required, which makes sense. Fixes: 43d596e32276 ("usb: typec: intel_pmc_mux: Check the port status before connect") Cc: stable Signed-off-by: Rajat Khandelwal Signed-off-by: Lee Shawn C Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221024171611.181468-1-rajat.khandelwal@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index e1f4df7238bf..fdbf3694e21f 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -369,13 +369,24 @@ pmc_usb_mux_usb4(struct pmc_usb_port *port, struct typec_mux_state *state) return pmc_usb_command(port, (void *)&req, sizeof(req)); } -static int pmc_usb_mux_safe_state(struct pmc_usb_port *port) +static int pmc_usb_mux_safe_state(struct pmc_usb_port *port, + struct typec_mux_state *state) { u8 msg; if (IOM_PORT_ACTIVITY_IS(port->iom_status, SAFE_MODE)) return 0; + if ((IOM_PORT_ACTIVITY_IS(port->iom_status, DP) || + IOM_PORT_ACTIVITY_IS(port->iom_status, DP_MFD)) && + state->alt && state->alt->svid == USB_TYPEC_DP_SID) + return 0; + + if ((IOM_PORT_ACTIVITY_IS(port->iom_status, TBT) || + IOM_PORT_ACTIVITY_IS(port->iom_status, ALT_MODE_TBT_USB)) && + state->alt && state->alt->svid == USB_TYPEC_TBT_SID) + return 0; + msg = PMC_USB_SAFE_MODE; msg |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT; @@ -443,7 +454,7 @@ pmc_usb_mux_set(struct typec_mux_dev *mux, struct typec_mux_state *state) return 0; if (state->mode == TYPEC_STATE_SAFE) - return pmc_usb_mux_safe_state(port); + return pmc_usb_mux_safe_state(port, state); if (state->mode == TYPEC_STATE_USB) return pmc_usb_connect(port, port->role); From 6d8fc203b28ff8f6115fbe5eaf584de8b824f4fa Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Wed, 2 Nov 2022 17:15:42 +0100 Subject: [PATCH 125/963] usb: typec: tipd: Prevent uninitialized event{1,2} in IRQ handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If reading TPS_REG_INT_EVENT1/2 fails in the interrupt handler event1 and event2 may be uninitialized when they are used to determine IRQ_HANDLED vs. IRQ_NONE in the error path. Fixes: c7260e29dd20 ("usb: typec: tipd: Add short-circuit for no irqs") Fixes: 45188f27b3d0 ("usb: typec: tipd: Add support for Apple CD321X") Cc: stable Signed-off-by: Sven Peter Reviewed-by: Eric Curtin Reviewed-by: Heikki Krogerus Reviewed-by: Guido Günther Link: https://lore.kernel.org/r/20221102161542.30669-1-sven@svenpeter.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tipd/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index b637e8b378b3..2a77bab948f5 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -474,7 +474,7 @@ static void tps6598x_handle_plug_event(struct tps6598x *tps, u32 status) static irqreturn_t cd321x_interrupt(int irq, void *data) { struct tps6598x *tps = data; - u64 event; + u64 event = 0; u32 status; int ret; @@ -519,8 +519,8 @@ static irqreturn_t cd321x_interrupt(int irq, void *data) static irqreturn_t tps6598x_interrupt(int irq, void *data) { struct tps6598x *tps = data; - u64 event1; - u64 event2; + u64 event1 = 0; + u64 event2 = 0; u32 status; int ret; From eb86559a691cea5fa63e57a03ec3dc9c31e97955 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 8 Nov 2022 13:11:31 +0800 Subject: [PATCH 126/963] bpf: Fix memory leaks in __check_func_call kmemleak reports this issue: unreferenced object 0xffff88817139d000 (size 2048): comm "test_progs", pid 33246, jiffies 4307381979 (age 45851.820s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000045f075f0>] kmalloc_trace+0x27/0xa0 [<0000000098b7c90a>] __check_func_call+0x316/0x1230 [<00000000b4c3c403>] check_helper_call+0x172e/0x4700 [<00000000aa3875b7>] do_check+0x21d8/0x45e0 [<000000001147357b>] do_check_common+0x767/0xaf0 [<00000000b5a595b4>] bpf_check+0x43e3/0x5bc0 [<0000000011e391b1>] bpf_prog_load+0xf26/0x1940 [<0000000007f765c0>] __sys_bpf+0xd2c/0x3650 [<00000000839815d6>] __x64_sys_bpf+0x75/0xc0 [<00000000946ee250>] do_syscall_64+0x3b/0x90 [<0000000000506b7f>] entry_SYSCALL_64_after_hwframe+0x63/0xcd The root case here is: In function prepare_func_exit(), the callee is not released in the abnormal scenario after "state->curframe--;". To fix, move "state->curframe--;" to the very bottom of the function, right when we free callee and reset frame[] pointer to NULL, as Andrii suggested. In addition, function __check_func_call() has a similar problem. In the abnormal scenario before "state->curframe++;", the callee also should be released by free_func_state(). Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper") Fixes: fd978bf7fd31 ("bpf: Add reference tracking to verifier") Signed-off-by: Wang Yufen Link: https://lore.kernel.org/r/1667884291-15666-1-git-send-email-wangyufen@huawei.com Signed-off-by: Martin KaFai Lau --- kernel/bpf/verifier.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 225666307bba..264b3dc714cc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6745,11 +6745,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn /* Transfer references to the callee */ err = copy_reference_state(callee, caller); if (err) - return err; + goto err_out; err = set_callee_state_cb(env, caller, callee, *insn_idx); if (err) - return err; + goto err_out; clear_caller_saved_regs(env, caller->regs); @@ -6766,6 +6766,11 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn print_verifier_state(env, callee, true); } return 0; + +err_out: + free_func_state(callee); + state->frame[state->curframe + 1] = NULL; + return err; } int map_set_for_each_callback_args(struct bpf_verifier_env *env, @@ -6979,8 +6984,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return -EINVAL; } - state->curframe--; - caller = state->frame[state->curframe]; + caller = state->frame[state->curframe - 1]; if (callee->in_callback_fn) { /* enforce R0 return value range [0, 1]. */ struct tnum range = callee->callback_ret_range; @@ -7019,7 +7023,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) } /* clear everything in the callee */ free_func_state(callee); - state->frame[state->curframe + 1] = NULL; + state->frame[state->curframe--] = NULL; return 0; } From 26c263bf1847d4dadba016a0457c4c5f446407bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 8 Nov 2022 09:30:52 -0800 Subject: [PATCH 127/963] Input: i8042 - apply probe defer to more ASUS ZenBook models There are yet a few more ASUS ZenBook models that require the deferred probe. At least, there are different ZenBook UX325x and UX425x models. Let's extend the DMI matching table entries for adapting those missing models. Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20221108142027.28480-1-tiwai@suse.de Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-acpipnpio.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 0778dc03cd9e..46f8a694291e 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -115,18 +115,18 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_NEVER) }, { - /* ASUS ZenBook UX425UA */ + /* ASUS ZenBook UX425UA/QA */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425"), }, .driver_data = (void *)(SERIO_QUIRK_PROBE_DEFER | SERIO_QUIRK_RESET_NEVER) }, { - /* ASUS ZenBook UM325UA */ + /* ASUS ZenBook UM325UA/QA */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325"), }, .driver_data = (void *)(SERIO_QUIRK_PROBE_DEFER | SERIO_QUIRK_RESET_NEVER) }, From d68cc25b7c7fb3034c5a5b5f350a0b858c6d5a45 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sun, 6 Nov 2022 22:48:04 +0100 Subject: [PATCH 128/963] usb: dwc3: Do not get extcon device when usb-role-switch is used The change breaks device tree based platforms with PHY device and use usb-role-switch instead of an extcon switch. extcon_find_edev_by_node() will return EPROBE_DEFER if it can not find a device so probing without an extcon device will be deferred indefinitely. Fix this by explicitly checking for usb-role-switch. At least the out-of-tree USB3 support on Apple silicon based platforms using dwc3 with tipd USB Type-C and PD controller is affected by this issue. Fixes: d182c2e1bc92 ("usb: dwc3: Don't switch OTG -> peripheral if extcon is present") Cc: stable@kernel.org Signed-off-by: Janne Grunau Acked-by: Thinh Nguyen Reviewed-by: Sven Peter Link: https://lore.kernel.org/r/20221106214804.2814-1-j@jannau.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index c0e7c76dc5c8..1f348bc867c2 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1710,6 +1710,16 @@ static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc) if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) return extcon_get_extcon_dev(name); + /* + * Check explicitly if "usb-role-switch" is used since + * extcon_find_edev_by_node() can not be used to check the absence of + * an extcon device. In the absence of an device it will always return + * EPROBE_DEFER. + */ + if (IS_ENABLED(CONFIG_USB_ROLE_SWITCH) && + device_property_read_bool(dev, "usb-role-switch")) + return NULL; + /* * Try to get an extcon device from the USB PHY controller's "port" * node. Check if it has the "port" node first, to avoid printing the From f0861f49bd946ff94fce4f82509c45e167f63690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Borys=20Pop=C5=82awski?= Date: Wed, 5 Oct 2022 00:59:03 +0200 Subject: [PATCH 129/963] x86/sgx: Add overflow check in sgx_validate_offset_length() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sgx_validate_offset_length() function verifies "offset" and "length" arguments provided by userspace, but was missing an overflow check on their addition. Add it. Fixes: c6d26d370767 ("x86/sgx: Add SGX_IOC_ENCLAVE_ADD_PAGES") Signed-off-by: Borys Popławski Signed-off-by: Borislav Petkov Reviewed-by: Jarkko Sakkinen Cc: stable@vger.kernel.org # v5.11+ Link: https://lore.kernel.org/r/0d91ac79-6d84-abed-5821-4dbe59fa1a38@invisiblethingslab.com --- arch/x86/kernel/cpu/sgx/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index ebe79d60619f..da8b8ea6b063 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -356,6 +356,9 @@ static int sgx_validate_offset_length(struct sgx_encl *encl, if (!length || !IS_ALIGNED(length, PAGE_SIZE)) return -EINVAL; + if (offset + length < offset) + return -EINVAL; + if (offset + length - PAGE_SIZE >= encl->size) return -EINVAL; From 8950f345a67d8046d2472dd6ea81fa18ef5b4844 Mon Sep 17 00:00:00 2001 From: Dominik Haller Date: Tue, 11 Oct 2022 16:31:15 +0200 Subject: [PATCH 130/963] ARM: dts: am335x-pcm-953: Define fixed regulators in root node Remove the regulators node and define fixed regulators in the root node. Prevents the sdhci-omap driver from waiting in probe deferral forever because of the missing vmmc-supply and keeps am335x-pcm-953 consistent with the other Phytec AM335 boards. Fixes: bb07a829ec38 ("ARM: dts: Add support for phyCORE-AM335x PCM-953 carrier board") Signed-off-by: Dominik Haller Message-Id: <20221011143115.248003-1-d.haller@phytec.de> Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-pcm-953.dtsi | 28 +++++++++++++-------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/arch/arm/boot/dts/am335x-pcm-953.dtsi b/arch/arm/boot/dts/am335x-pcm-953.dtsi index dae448040a97..947497413977 100644 --- a/arch/arm/boot/dts/am335x-pcm-953.dtsi +++ b/arch/arm/boot/dts/am335x-pcm-953.dtsi @@ -12,22 +12,20 @@ / { compatible = "phytec,am335x-pcm-953", "phytec,am335x-phycore-som", "ti,am33xx"; /* Power */ - regulators { - vcc3v3: fixedregulator@1 { - compatible = "regulator-fixed"; - regulator-name = "vcc3v3"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - regulator-boot-on; - }; + vcc3v3: fixedregulator1 { + compatible = "regulator-fixed"; + regulator-name = "vcc3v3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-boot-on; + }; - vcc1v8: fixedregulator@2 { - compatible = "regulator-fixed"; - regulator-name = "vcc1v8"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - regulator-boot-on; - }; + vcc1v8: fixedregulator2 { + compatible = "regulator-fixed"; + regulator-name = "vcc1v8"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; }; /* User IO */ From fed74d75277da865da9ba334d3f5d5e3e327971d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 4 Nov 2022 11:56:05 +0100 Subject: [PATCH 131/963] pinctrl: mediatek: common-v2: Fix bias-disable for PULL_PU_PD_RSEL_TYPE In pinctrl-paris we're calling the .bias_set_combo() callback when we are asked to set the pin bias to either pull up/down or pull disable. On newer platforms, this callback is mtk_pinconf_bias_set_combo(), located in pinctrl-mtk-common-v2.c: this will check the "pull type" assigned to the requested pin and in case said pin's pull type is MTK_PULL_PU_PD_RSEL_TYPE, this function will set RSEL first, PUPD last, which is fine. The issue comes when we're requesting PIN_CONFIG_BIAS_DISABLE, as this does *not* require setting RSEL but only PU_PD: in this case, the arg is MTK_DISABLE (zero), which is not a supported RSEL, due to which function mtk_pinconf_bias_set_rsel() returns a failure; because of that, mtk_pinconf_bias_set_pu_pd() is never called, hence the pin bias is never set to DISABLE. To fix this issue, add a check to mtk_pinconf_bias_set_rsel(): if we are entering that function with no pullup requested and at the same time the arg is MTK_DISABLE, this means that we're trying to disable pin bias, hence it's safe to return cleanly without ever setting any RSEL register. This makes mtk_pinconf_bias_set_combo() happy, going on with setting the PU_PD registers, which is the only action to actually take to disable bias on a pin/pingroup. Fixes: fb34a9ae383a ("pinctrl: mediatek: support rsel feature") Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221104105605.33720-1-angelogioacchino.delregno@collabora.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index e1ae3beb9f72..b7921b59eb7b 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -709,6 +709,9 @@ static int mtk_pinconf_bias_set_rsel(struct mtk_pinctrl *hw, { int err, rsel_val; + if (!pullup && arg == MTK_DISABLE) + return 0; + if (hw->rsel_si_unit) { /* find pin rsel_index from pin_rsel array*/ err = mtk_hw_pin_rsel_lookup(hw, desc, pullup, arg, &rsel_val); From 7a58b8d6021426b796eebfae80983374d9a80a75 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sun, 18 Sep 2022 11:33:12 +0800 Subject: [PATCH 132/963] usb: chipidea: fix deadlock in ci_otg_del_timer There is a deadlock in ci_otg_del_timer(), the process is shown below: (thread 1) | (thread 2) ci_otg_del_timer() | ci_otg_hrtimer_func() ... | spin_lock_irqsave() //(1) | ... ... | hrtimer_cancel() | spin_lock_irqsave() //(2) (block forever) We hold ci->lock in position (1) and use hrtimer_cancel() to wait ci_otg_hrtimer_func() to stop, but ci_otg_hrtimer_func() also need ci->lock in position (2). As a result, the hrtimer_cancel() in ci_otg_del_timer() will be blocked forever. This patch extracts hrtimer_cancel() from the protection of spin_lock_irqsave() in order that the ci_otg_hrtimer_func() could obtain the ci->lock. What`s more, there will be no race happen. Because the "next_timer" is always under the protection of spin_lock_irqsave() and we only check whether "next_timer" equals to NUM_OTG_FSM_TIMERS in the following code. Fixes: 3a316ec4c91c ("usb: chipidea: use hrtimer for otg fsm timers") Cc: stable Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20220918033312.94348-1-duoming@zju.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/otg_fsm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c index ada78daba6df..c17516c29b63 100644 --- a/drivers/usb/chipidea/otg_fsm.c +++ b/drivers/usb/chipidea/otg_fsm.c @@ -256,8 +256,10 @@ static void ci_otg_del_timer(struct ci_hdrc *ci, enum otg_fsm_timer t) ci->enabled_otg_timer_bits &= ~(1 << t); if (ci->next_otg_timer == t) { if (ci->enabled_otg_timer_bits == 0) { + spin_unlock_irqrestore(&ci->lock, flags); /* No enabled timers after delete it */ hrtimer_cancel(&ci->otg_fsm_hrtimer); + spin_lock_irqsave(&ci->lock, flags); ci->next_otg_timer = NUM_OTG_FSM_TIMERS; } else { /* Find the next timer */ From 9d5333c931347005352d5b8beaa43528c94cfc9c Mon Sep 17 00:00:00 2001 From: Li Jun Date: Wed, 26 Oct 2022 15:07:49 -0400 Subject: [PATCH 133/963] usb: cdns3: host: fix endless superspeed hub port reset When usb 3.0 hub connect with one USB 2.0 device and NO USB 3.0 device, some usb hub reports endless port reset message. [ 190.324169] usb 2-1: new SuperSpeed USB device number 88 using xhci-hcd [ 190.352834] hub 2-1:1.0: USB hub found [ 190.356995] hub 2-1:1.0: 4 ports detected [ 190.700056] usb 2-1: USB disconnect, device number 88 [ 192.472139] usb 2-1: new SuperSpeed USB device number 89 using xhci-hcd [ 192.500820] hub 2-1:1.0: USB hub found [ 192.504977] hub 2-1:1.0: 4 ports detected [ 192.852066] usb 2-1: USB disconnect, device number 89 The reason is the runtime pm state of USB2.0 port is active and USB 3.0 port is suspend, so parent device is active state. cat /sys/bus/platform/devices/5b110000.usb/5b130000.usb/xhci-hcd.1.auto/usb2/power/runtime_status suspended cat /sys/bus/platform/devices/5b110000.usb/5b130000.usb/xhci-hcd.1.auto/usb1/power/runtime_status active cat /sys/bus/platform/devices/5b110000.usb/5b130000.usb/xhci-hcd.1.auto/power/runtime_status active cat /sys/bus/platform/devices/5b110000.usb/5b130000.usb/power/runtime_status active So xhci_cdns3_suspend_quirk() have not called. U3 configure is not applied. move U3 configure into host start. Reinit again in resume function in case controller power lost during suspend. Cc: stable@vger.kernel.org 5.10 Signed-off-by: Li Jun Signed-off-by: Frank Li Reviewed-by: Peter Chen Acked-by: Alexander Stein Link: https://lore.kernel.org/r/20221026190749.2280367-1-Frank.Li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/host.c | 56 ++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c index 9643b905e2d8..6164fc4c96a4 100644 --- a/drivers/usb/cdns3/host.c +++ b/drivers/usb/cdns3/host.c @@ -24,11 +24,37 @@ #define CFG_RXDET_P3_EN BIT(15) #define LPM_2_STB_SWITCH_EN BIT(25) -static int xhci_cdns3_suspend_quirk(struct usb_hcd *hcd); +static void xhci_cdns3_plat_start(struct usb_hcd *hcd) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + u32 value; + + /* set usbcmd.EU3S */ + value = readl(&xhci->op_regs->command); + value |= CMD_PM_INDEX; + writel(value, &xhci->op_regs->command); + + if (hcd->regs) { + value = readl(hcd->regs + XECP_AUX_CTRL_REG1); + value |= CFG_RXDET_P3_EN; + writel(value, hcd->regs + XECP_AUX_CTRL_REG1); + + value = readl(hcd->regs + XECP_PORT_CAP_REG); + value |= LPM_2_STB_SWITCH_EN; + writel(value, hcd->regs + XECP_PORT_CAP_REG); + } +} + +static int xhci_cdns3_resume_quirk(struct usb_hcd *hcd) +{ + xhci_cdns3_plat_start(hcd); + return 0; +} static const struct xhci_plat_priv xhci_plat_cdns3_xhci = { .quirks = XHCI_SKIP_PHY_INIT | XHCI_AVOID_BEI, - .suspend_quirk = xhci_cdns3_suspend_quirk, + .plat_start = xhci_cdns3_plat_start, + .resume_quirk = xhci_cdns3_resume_quirk, }; static int __cdns_host_init(struct cdns *cdns) @@ -90,32 +116,6 @@ static int __cdns_host_init(struct cdns *cdns) return ret; } -static int xhci_cdns3_suspend_quirk(struct usb_hcd *hcd) -{ - struct xhci_hcd *xhci = hcd_to_xhci(hcd); - u32 value; - - if (pm_runtime_status_suspended(hcd->self.controller)) - return 0; - - /* set usbcmd.EU3S */ - value = readl(&xhci->op_regs->command); - value |= CMD_PM_INDEX; - writel(value, &xhci->op_regs->command); - - if (hcd->regs) { - value = readl(hcd->regs + XECP_AUX_CTRL_REG1); - value |= CFG_RXDET_P3_EN; - writel(value, hcd->regs + XECP_AUX_CTRL_REG1); - - value = readl(hcd->regs + XECP_PORT_CAP_REG); - value |= LPM_2_STB_SWITCH_EN; - writel(value, hcd->regs + XECP_PORT_CAP_REG); - } - - return 0; -} - static void cdns_host_exit(struct cdns *cdns) { kfree(cdns->xhci_plat_data); From bdfe34597139cfcecd47a2eb97fea44d77157491 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 8 Sep 2022 10:33:15 +0530 Subject: [PATCH 134/963] perf/x86/amd/uncore: Fix memory leak for events array When a CPU comes online, the per-CPU NB and LLC uncore contexts are freed but not the events array within the context structure. This causes a memory leak as identified by the kmemleak detector. [...] unreferenced object 0xffff8c5944b8e320 (size 32): comm "swapper/0", pid 1, jiffies 4294670387 (age 151.072s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000000759fb79>] amd_uncore_cpu_up_prepare+0xaf/0x230 [<00000000ddc9e126>] cpuhp_invoke_callback+0x2cf/0x470 [<0000000093e727d4>] cpuhp_issue_call+0x14d/0x170 [<0000000045464d54>] __cpuhp_setup_state_cpuslocked+0x11e/0x330 [<0000000069f67cbd>] __cpuhp_setup_state+0x6b/0x110 [<0000000015365e0f>] amd_uncore_init+0x260/0x321 [<00000000089152d2>] do_one_initcall+0x3f/0x1f0 [<000000002d0bd18d>] kernel_init_freeable+0x1ca/0x212 [<0000000030be8dde>] kernel_init+0x11/0x120 [<0000000059709e59>] ret_from_fork+0x22/0x30 unreferenced object 0xffff8c5944b8dd40 (size 64): comm "swapper/0", pid 1, jiffies 4294670387 (age 151.072s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000306efe8b>] amd_uncore_cpu_up_prepare+0x183/0x230 [<00000000ddc9e126>] cpuhp_invoke_callback+0x2cf/0x470 [<0000000093e727d4>] cpuhp_issue_call+0x14d/0x170 [<0000000045464d54>] __cpuhp_setup_state_cpuslocked+0x11e/0x330 [<0000000069f67cbd>] __cpuhp_setup_state+0x6b/0x110 [<0000000015365e0f>] amd_uncore_init+0x260/0x321 [<00000000089152d2>] do_one_initcall+0x3f/0x1f0 [<000000002d0bd18d>] kernel_init_freeable+0x1ca/0x212 [<0000000030be8dde>] kernel_init+0x11/0x120 [<0000000059709e59>] ret_from_fork+0x22/0x30 [...] Fix the problem by freeing the events array before freeing the uncore context. Fixes: 39621c5808f5 ("perf/x86/amd/uncore: Use dynamic events array") Reported-by: Ravi Bangoria Signed-off-by: Sandipan Das Signed-off-by: Borislav Petkov Tested-by: Ravi Bangoria Cc: Link: https://lore.kernel.org/r/4fa9e5ac6d6e41fa889101e7af7e6ba372cfea52.1662613255.git.sandipan.das@amd.com --- arch/x86/events/amd/uncore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index d568afc705d2..83f15fe411b3 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -553,6 +553,7 @@ static void uncore_clean_online(void) hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { hlist_del(&uncore->node); + kfree(uncore->events); kfree(uncore); } } From a931237cbea256aff13bb403da13a97b2d1605d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 8 Nov 2022 14:19:49 +0200 Subject: [PATCH 135/963] serial: 8250: Fall back to non-DMA Rx if IIR_RDI occurs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DW UART sometimes triggers IIR_RDI during DMA Rx when IIR_RX_TIMEOUT should have been triggered instead. Since IIR_RDI has higher priority than IIR_RX_TIMEOUT, this causes the Rx to hang into interrupt loop. The problem seems to occur at least with some combinations of small-sized transfers (I've reproduced the problem on Elkhart Lake PSE UARTs). If there's already an on-going Rx DMA and IIR_RDI triggers, fall graciously back to non-DMA Rx. That is, behave as if IIR_RX_TIMEOUT had occurred. 8250_omap already considers IIR_RDI similar to this change so its nothing unheard of. Fixes: 75df022b5f89 ("serial: 8250_dma: Fix RX handling") Cc: Co-developed-by: Srikanth Thokala Signed-off-by: Srikanth Thokala Co-developed-by: Aman Kumar Signed-off-by: Aman Kumar Signed-off-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221108121952.5497-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index fe8662cd9402..92dd18716169 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1897,6 +1897,10 @@ EXPORT_SYMBOL_GPL(serial8250_modem_status); static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) { switch (iir & 0x3f) { + case UART_IIR_RDI: + if (!up->dma->rx_running) + break; + fallthrough; case UART_IIR_RX_TIMEOUT: serial8250_rx_dma_flush(up); fallthrough; From 1bfcbe5805d0cfc83c3544dcd01e0a282c1f6790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 8 Nov 2022 14:19:50 +0200 Subject: [PATCH 136/963] serial: 8250_lpss: Configure DMA also w/o DMA filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the platform doesn't use DMA device filter (as is the case with Elkhart Lake), whole lpss8250_dma_setup() setup is skipped. This results in skipping also *_maxburst setup which is undesirable. Refactor lpss8250_dma_setup() to configure DMA even if filter is not setup. Cc: stable Signed-off-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221108121952.5497-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_lpss.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index 44cc755b1a29..7d9cddbfef40 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -277,8 +277,13 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port struct dw_dma_slave *rx_param, *tx_param; struct device *dev = port->port.dev; - if (!lpss->dma_param.dma_dev) + if (!lpss->dma_param.dma_dev) { + dma = port->dma; + if (dma) + goto out_configuration_only; + return 0; + } rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL); if (!rx_param) @@ -289,16 +294,18 @@ static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port return -ENOMEM; *rx_param = lpss->dma_param; - dma->rxconf.src_maxburst = lpss->dma_maxburst; - *tx_param = lpss->dma_param; - dma->txconf.dst_maxburst = lpss->dma_maxburst; dma->fn = lpss8250_dma_filter; dma->rx_param = rx_param; dma->tx_param = tx_param; port->dma = dma; + +out_configuration_only: + dma->rxconf.src_maxburst = lpss->dma_maxburst; + dma->txconf.dst_maxburst = lpss->dma_maxburst; + return 0; } From 7090abd6ad0610a144523ce4ffcb8560909bf2a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 8 Nov 2022 14:19:51 +0200 Subject: [PATCH 137/963] serial: 8250_lpss: Use 16B DMA burst with Elkhart Lake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Configure DMA to use 16B burst size with Elkhart Lake. This makes the bus use more efficient and works around an issue which occurs with the previously used 1B. The fix was initially developed by Srikanth Thokala and Aman Kumar. This together with the previous config change is the cleaned up version of the original fix. Fixes: 0a9410b981e9 ("serial: 8250_lpss: Enable DMA on Intel Elkhart Lake") Cc: # serial: 8250_lpss: Configure DMA also w/o DMA filter Reported-by: Wentong Wu Signed-off-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221108121952.5497-4-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_lpss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index 7d9cddbfef40..0e43bdfb7459 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -174,6 +174,8 @@ static int ehl_serial_setup(struct lpss8250 *lpss, struct uart_port *port) */ up->dma = dma; + lpss->dma_maxburst = 16; + port->set_termios = dw8250_do_set_termios; return 0; From 1980860e0c8299316cddaf0992dd9e1258ec9d88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 8 Nov 2022 14:19:52 +0200 Subject: [PATCH 138/963] serial: 8250: Flush DMA Rx on RLSI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning true from handle_rx_dma() without flushing DMA first creates a data ordering hazard. If DMA Rx has handled any character at the point when RLSI occurs, the non-DMA path handles any pending characters jumping them ahead of those characters that are pending under DMA. Fixes: 75df022b5f89 ("serial: 8250_dma: Fix RX handling") Cc: Signed-off-by: Ilpo Järvinen Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221108121952.5497-5-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 92dd18716169..388172289627 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1901,10 +1901,9 @@ static bool handle_rx_dma(struct uart_8250_port *up, unsigned int iir) if (!up->dma->rx_running) break; fallthrough; + case UART_IIR_RLSI: case UART_IIR_RX_TIMEOUT: serial8250_rx_dma_flush(up); - fallthrough; - case UART_IIR_RLSI: return true; } return up->dma->rx_dma(up); From 181135bb20dcb184edd89817831b888eb8132741 Mon Sep 17 00:00:00 2001 From: Nicolas Dumazet Date: Wed, 9 Nov 2022 13:29:46 +0100 Subject: [PATCH 139/963] usb: add NO_LPM quirk for Realforce 87U Keyboard Before adding this quirk, this (mechanical keyboard) device would not be recognized, logging: new full-speed USB device number 56 using xhci_hcd unable to read config index 0 descriptor/start: -32 chopping to 0 config(s) It would take dozens of plugging/unpuggling cycles for the keyboard to be recognized. Keyboard seems to simply work after applying this quirk. This issue had been reported by users in two places already ([1], [2]) but nobody tried upstreaming a patch yet. After testing I believe their suggested fix (DELAY_INIT + NO_LPM + DEVICE_QUALIFIER) was probably a little overkill. I assume this particular combination was tested because it had been previously suggested in [3], but only NO_LPM seems sufficient for this device. [1]: https://qiita.com/float168/items/fed43d540c8e2201b543 [2]: https://blog.kostic.dev/posts/making-the-realforce-87ub-work-with-usb30-on-Ubuntu/ [3]: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1678477 Cc: stable@vger.kernel.org Signed-off-by: Nicolas Dumazet Link: https://lore.kernel.org/r/20221109122946.706036-1-ndumazet@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 0722d2131305..079e183cf3bf 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -362,6 +362,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM }, { USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM }, + /* Realforce 87U Keyboard */ + { USB_DEVICE(0x0853, 0x011b), .driver_info = USB_QUIRK_NO_LPM }, + /* M-Systems Flash Disk Pioneers */ { USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME }, From 3ec17cb325ac731c2211e13f7eaa4b812694e218 Mon Sep 17 00:00:00 2001 From: Akira Yokosawa Date: Wed, 2 Nov 2022 20:48:01 +0900 Subject: [PATCH 140/963] docs/driver-api/miscellaneous: Remove kernel-doc of serial_core.c Since merge of tty-6.0-rc1, "make htmldocs" with Sphinx >=3.1 emits a bunch of warnings indicating duplicate kernel-doc comments from drivers/tty/serial/serial_core.c. This is due to the kernel-doc directive for serial_core.c in serial/drivers.rst added in the merge. It conflicts with an existing kernel-doc directive in miscellaneous.rst. Remove the latter directive and resolve the duplicates. Signed-off-by: Akira Yokosawa Fixes: 607ca0f742b7 ("Merge tag 'tty-6.0-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty") Cc: stable@vger.kernel.org # 6.0 Cc: Jiri Slaby Cc: Greg Kroah-Hartman Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/4e54c76a-138a-07e0-985a-dd83cb622208@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/miscellaneous.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/driver-api/miscellaneous.rst b/Documentation/driver-api/miscellaneous.rst index 304ffb146cf9..4a5104a368ac 100644 --- a/Documentation/driver-api/miscellaneous.rst +++ b/Documentation/driver-api/miscellaneous.rst @@ -16,12 +16,11 @@ Parallel Port Devices 16x50 UART Driver ================= -.. kernel-doc:: drivers/tty/serial/serial_core.c - :export: - .. kernel-doc:: drivers/tty/serial/8250/8250_core.c :export: +See serial/driver.rst for related APIs. + Pulse-Width Modulation (PWM) ============================ From 0fc801f8018000c8e64a275a20cb1da7c54e46df Mon Sep 17 00:00:00 2001 From: Mushahid Hussain Date: Mon, 10 Oct 2022 21:57:20 +0500 Subject: [PATCH 141/963] speakup: fix a segfault caused by switching consoles This patch fixes a segfault by adding a null check on synth in speakup_con_update(). The segfault can be reproduced as follows: - Login into a text console - Load speakup and speakup_soft modules - Remove speakup_soft - Switch to a graphics console This is caused by lack of a null check on `synth` in speakup_con_update(). Here's the sequence that causes the segfault: - When we remove the speakup_soft, synth_release() sets the synth to null. - After that, when we change the virtual console to graphics console, vt_notifier_call() is fired, which then calls speakup_con_update(). - Inside speakup_con_update() there's no null check on synth, so it calls synth_printf(). - Inside synth_printf(), synth_buffer_add() and synth_start(), both access synth, when it is null and causing a segfault. Therefore adding a null check on synth solves the issue. Fixes: 2610df41489f ("staging: speakup: Add pause command used on switching to graphical mode") Cc: stable Signed-off-by: Mushahid Hussain Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20221010165720.397042-1-mushi.shar@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index f52265293482..73db0cb44fc7 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -1778,7 +1778,7 @@ static void speakup_con_update(struct vc_data *vc) { unsigned long flags; - if (!speakup_console[vc->vc_num] || spk_parked) + if (!speakup_console[vc->vc_num] || spk_parked || !synth) return; if (!spin_trylock_irqsave(&speakup_info.spinlock, flags)) /* Speakup output, discard */ From 92ca969ff8815f3feef2645199bd39bf594e5eeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90o=C3=A0n=20Tr=E1=BA=A7n=20C=C3=B4ng=20Danh?= Date: Mon, 17 Oct 2022 15:09:36 +0700 Subject: [PATCH 142/963] speakup: replace utils' u_char with unsigned char MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/accessibility/speakup/utils.h will be used to compile host tool to generate metadata. "u_char" is a non-standard type, which is defined to "unsigned char" on glibc but not defined by some libc, e.g. musl. Let's replace "u_char" with "unsigned char" Signed-off-by: Đoàn Trần Công Danh Reviewed-by: Samuel Thibault Cc: stable Link: https://lore.kernel.org/r/b75743026aaee2d81efe3d7f2e8fa47f7d0b8ea7.1665736571.git.congdanhqx@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accessibility/speakup/utils.h b/drivers/accessibility/speakup/utils.h index 4bf2ee8ac246..4ce9a12f7664 100644 --- a/drivers/accessibility/speakup/utils.h +++ b/drivers/accessibility/speakup/utils.h @@ -54,7 +54,7 @@ static inline int oops(const char *msg, const char *info) static inline struct st_key *hash_name(char *name) { - u_char *pn = (u_char *)name; + unsigned char *pn = (unsigned char *)name; int hash = 0; while (*pn) { From e5b0d06d9b10f5f43101bd6598b076c347f9295f Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 4 Nov 2022 18:58:49 +0100 Subject: [PATCH 143/963] misc/vmw_vmci: fix an infoleak in vmci_host_do_receive_datagram() `struct vmci_event_qp` allocated by qp_notify_peer() contains padding, which may carry uninitialized data to the userspace, as observed by KMSAN: BUG: KMSAN: kernel-infoleak in instrument_copy_to_user ./include/linux/instrumented.h:121 instrument_copy_to_user ./include/linux/instrumented.h:121 _copy_to_user+0x5f/0xb0 lib/usercopy.c:33 copy_to_user ./include/linux/uaccess.h:169 vmci_host_do_receive_datagram drivers/misc/vmw_vmci/vmci_host.c:431 vmci_host_unlocked_ioctl+0x33d/0x43d0 drivers/misc/vmw_vmci/vmci_host.c:925 vfs_ioctl fs/ioctl.c:51 ... Uninit was stored to memory at: kmemdup+0x74/0xb0 mm/util.c:131 dg_dispatch_as_host drivers/misc/vmw_vmci/vmci_datagram.c:271 vmci_datagram_dispatch+0x4f8/0xfc0 drivers/misc/vmw_vmci/vmci_datagram.c:339 qp_notify_peer+0x19a/0x290 drivers/misc/vmw_vmci/vmci_queue_pair.c:1479 qp_broker_attach drivers/misc/vmw_vmci/vmci_queue_pair.c:1662 qp_broker_alloc+0x2977/0x2f30 drivers/misc/vmw_vmci/vmci_queue_pair.c:1750 vmci_qp_broker_alloc+0x96/0xd0 drivers/misc/vmw_vmci/vmci_queue_pair.c:1940 vmci_host_do_alloc_queuepair drivers/misc/vmw_vmci/vmci_host.c:488 vmci_host_unlocked_ioctl+0x24fd/0x43d0 drivers/misc/vmw_vmci/vmci_host.c:927 ... Local variable ev created at: qp_notify_peer+0x54/0x290 drivers/misc/vmw_vmci/vmci_queue_pair.c:1456 qp_broker_attach drivers/misc/vmw_vmci/vmci_queue_pair.c:1662 qp_broker_alloc+0x2977/0x2f30 drivers/misc/vmw_vmci/vmci_queue_pair.c:1750 Bytes 28-31 of 48 are uninitialized Memory access of size 48 starts at ffff888035155e00 Data copied to user address 0000000020000100 Use memset() to prevent the infoleaks. Also speculatively fix qp_notify_peer_local(), which may suffer from the same problem. Reported-by: syzbot+39be4da489ed2493ba25@syzkaller.appspotmail.com Cc: stable Fixes: 06164d2b72aa ("VMCI: queue pairs implementation.") Signed-off-by: Alexander Potapenko Reviewed-by: Vishnu Dasa Link: https://lore.kernel.org/r/20221104175849.2782567-1-glider@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_queue_pair.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index e71068f7759b..844264e1b88c 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -854,6 +854,7 @@ static int qp_notify_peer_local(bool attach, struct vmci_handle handle) u32 context_id = vmci_get_context_id(); struct vmci_event_qp ev; + memset(&ev, 0, sizeof(ev)); ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER); ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); @@ -1467,6 +1468,7 @@ static int qp_notify_peer(bool attach, * kernel. */ + memset(&ev, 0, sizeof(ev)); ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER); ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, VMCI_CONTEXT_RESOURCE_ID); From 6e63153db50059fb78b8a8447b132664887d24e3 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 4 Nov 2022 10:13:34 +0800 Subject: [PATCH 144/963] siox: fix possible memory leak in siox_device_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If device_register() returns error in siox_device_add(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and sdevice is freed in siox_device_release(), set it to null in error path. Fixes: bbecb07fa0af ("siox: new driver framework for eckelmann SIOX") Signed-off-by: Yang Yingliang Reviewed-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221104021334.618189-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/siox/siox-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/siox/siox-core.c b/drivers/siox/siox-core.c index 7c4f32d76966..561408583b2b 100644 --- a/drivers/siox/siox-core.c +++ b/drivers/siox/siox-core.c @@ -839,6 +839,8 @@ static struct siox_device *siox_device_add(struct siox_master *smaster, err_device_register: /* don't care to make the buffer smaller again */ + put_device(&sdevice->dev); + sdevice = NULL; err_buf_alloc: siox_master_unlock(smaster); From ab126f51c93a15093df604f661c9480854c005a3 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 23 Sep 2022 19:52:08 +0100 Subject: [PATCH 145/963] parport_pc: Avoid FIFO port location truncation Match the data type of a temporary holding a reference to the FIFO port with the type of the original reference coming from `struct parport', avoiding data truncation with LP64 ports such as SPARC64 that refer to PCI port I/O locations via their corresponding MMIO addresses and will therefore have non-zero bits in the high 32-bit part of the reference. And in any case it is cleaner to have the data types matching here. Signed-off-by: Maciej W. Rozycki Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://lore.kernel.org/linux-pci/20220419033752.GA1101844@bhelgaas/ Acked-by: Sudip Mukherjee Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209231912550.29493@angie.orcam.me.uk Signed-off-by: Greg Kroah-Hartman --- drivers/parport/parport_pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 7c45927e2131..5784dc20fb38 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -468,7 +468,7 @@ static size_t parport_pc_fifo_write_block_pio(struct parport *port, const unsigned char *bufp = buf; size_t left = length; unsigned long expire = jiffies + port->physport->cad->timeout; - const int fifo = FIFO(port); + const unsigned long fifo = FIFO(port); int poll_for = 8; /* 80 usecs */ const struct parport_pc_private *priv = port->physport->private_data; const int fifo_depth = priv->fifo_depth; From 3ce00bb7e91cf57d723905371507af57182c37ef Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 4 Nov 2022 23:12:35 +0000 Subject: [PATCH 146/963] binder: validate alloc->mm in ->mmap() handler Since commit 1da52815d5f1 ("binder: fix alloc->vma_vm_mm null-ptr dereference") binder caches a pointer to the current->mm during open(). This fixes a null-ptr dereference reported by syzkaller. Unfortunately, it also opens the door for a process to update its mm after the open(), (e.g. via execve) making the cached alloc->mm pointer invalid. Things get worse when the process continues to mmap() a vma. From this point forward, binder will attempt to find this vma using an obsolete alloc->mm reference. Such as in binder_update_page_range(), where the wrong vma is obtained via vma_lookup(), yet binder proceeds to happily insert new pages into it. To avoid this issue fail the ->mmap() callback if we detect a mismatch between the vma->vm_mm and the original alloc->mm pointer. This prevents alloc->vm_addr from getting set, so that any subsequent vma_lookup() calls fail as expected. Fixes: 1da52815d5f1 ("binder: fix alloc->vma_vm_mm null-ptr dereference") Reported-by: Jann Horn Cc: # 5.15+ Signed-off-by: Carlos Llamas Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20221104231235.348958-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 1c39cfce32fa..4ad42b0f75cd 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -739,6 +739,12 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, const char *failure_string; struct binder_buffer *buffer; + if (unlikely(vma->vm_mm != alloc->mm)) { + ret = -EINVAL; + failure_string = "invalid vma->vm_mm"; + goto err_invalid_mm; + } + mutex_lock(&binder_alloc_mmap_lock); if (alloc->buffer_size) { ret = -EBUSY; @@ -785,6 +791,7 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc, alloc->buffer_size = 0; err_already_mapped: mutex_unlock(&binder_alloc_mmap_lock); +err_invalid_mm: binder_alloc_debug(BINDER_DEBUG_USER_ERROR, "%s: %d %lx-%lx %s failed %d\n", __func__, alloc->pid, vma->vm_start, vma->vm_end, From 7d945b046be3d2605dbb1806e73095aadd7ae129 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Wed, 9 Nov 2022 18:08:49 +0100 Subject: [PATCH 147/963] ASoC: stm32: dfsdm: manage cb buffers cleanup Ensure that resources allocated by iio_channel_get_all_cb() are released on driver unbind. Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20221109170849.273719-1-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_adfsdm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/stm/stm32_adfsdm.c b/sound/soc/stm/stm32_adfsdm.c index 643fc8a17018..837c1848d9bf 100644 --- a/sound/soc/stm/stm32_adfsdm.c +++ b/sound/soc/stm/stm32_adfsdm.c @@ -304,6 +304,11 @@ static int stm32_adfsdm_dummy_cb(const void *data, void *private) return 0; } +static void stm32_adfsdm_cleanup(void *data) +{ + iio_channel_release_all_cb(data); +} + static struct snd_soc_component_driver stm32_adfsdm_soc_platform = { .open = stm32_adfsdm_pcm_open, .close = stm32_adfsdm_pcm_close, @@ -350,6 +355,12 @@ static int stm32_adfsdm_probe(struct platform_device *pdev) if (IS_ERR(priv->iio_cb)) return PTR_ERR(priv->iio_cb); + ret = devm_add_action_or_reset(&pdev->dev, stm32_adfsdm_cleanup, priv->iio_cb); + if (ret < 0) { + dev_err(&pdev->dev, "Unable to add action\n"); + return ret; + } + component = devm_kzalloc(&pdev->dev, sizeof(*component), GFP_KERNEL); if (!component) return -ENOMEM; From 3ca507bf99611c82dafced73e921c1b10ee12869 Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Wed, 9 Nov 2022 20:13:54 +0800 Subject: [PATCH 148/963] ASoC: wm8962: Wait for updated value of WM8962_CLOCKING1 register DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate correct frequency of LRCLK and BCLK. Sometimes the read-only value can't be updated timely after enabling SYSCLK. This results in wrong calculation values. Delay is introduced here to wait for newest value from register. The time of the delay should be at least 500~1000us according to test. Signed-off-by: Chancel Liu Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221109121354.123958-1-chancel.liu@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm8962.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index b4b4355c6728..b901e4c65e8a 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2503,6 +2503,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component) snd_soc_component_update_bits(component, WM8962_CLOCKING2, WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA); + /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate + * correct frequency of LRCLK and BCLK. Sometimes the read-only value + * can't be updated timely after enabling SYSCLK. This results in wrong + * calculation values. Delay is introduced here to wait for newest + * value from register. The time of the delay should be at least + * 500~1000us according to test. + */ + usleep_range(500, 1000); dspclk = snd_soc_component_read(component, WM8962_CLOCKING1); if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON) From 0811664da064c6d7ca64c02f5579f758a007e52d Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 8 Nov 2022 20:19:45 +0800 Subject: [PATCH 149/963] selftests/bpf: Fix casting error when cross-compiling test_verifier for 32-bit platforms When cross-compiling test_verifier for 32-bit platforms, the casting error is shown below: test_verifier.c:1263:27: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] 1263 | info.xlated_prog_insns = (__u64)*buf; | ^ cc1: all warnings being treated as errors Fix it by adding zero-extension for it. Fixes: 933ff53191eb ("selftests/bpf: specify expected instructions in test_verifier tests") Signed-off-by: Pu Lehui Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221108121945.4104644-1-pulehui@huaweicloud.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 2dbcbf363c18..b605a70d4f6b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1260,7 +1260,7 @@ static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) bzero(&info, sizeof(info)); info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)*buf; + info.xlated_prog_insns = (__u64)(unsigned long)*buf; if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { perror("second bpf_obj_get_info_by_fd failed"); goto out_free_buf; From 5704bc7e8991164b14efb748b5afa0715c25fac3 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 8 Nov 2022 09:58:57 +0800 Subject: [PATCH 150/963] selftests/bpf: Fix test_progs compilation failure in 32-bit arch test_progs fails to be compiled in the 32-bit arch, log is as follows: test_progs.c:1013:52: error: format '%ld' expects argument of type 'long int', but argument 3 has type 'size_t' {aka 'unsigned int'} [-Werror=format=] 1013 | sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", | ~~^ | | | long int | %d 1014 | strlen(msg->test_log.log_buf), | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | | | size_t {aka unsigned int} Fix it. Fixes: 91b2c0afd00c ("selftests/bpf: Add parallelism to test_progs") Signed-off-by: Yang Jihong Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221108015857.132457-1-yangjihong1@huawei.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/test_progs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0e9a47f97890..3fef451d8831 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1010,7 +1010,7 @@ static inline const char *str_msg(const struct msg *msg, char *buf) msg->subtest_done.have_log); break; case MSG_TEST_LOG: - sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", + sprintf(buf, "MSG_TEST_LOG (cnt: %zu, last: %d)", strlen(msg->test_log.log_buf), msg->test_log.is_last); break; From 39bfcb8138f6dc3375f23b1e62ccfc7c0d83295d Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Fri, 21 Oct 2022 10:31:53 +0800 Subject: [PATCH 151/963] erofs: fix use-after-free of fsid and domain_id string When erofs instance is remounted with fsid or domain_id mount option specified, the original fsid and domain_id string pointer in sbi->opt is directly overridden with the fsid and domain_id string in the new fs_context, without freeing the original fsid and domain_id string. What's worse, when the new fsid and domain_id string is transferred to sbi, they are not reset to NULL in fs_context, and thus they are freed when remount finishes, while sbi is still referring to these strings. Reconfiguration for fsid and domain_id seems unusual. Thus clarify this restriction explicitly and dump a warning when users are attempting to do this. Besides, to fix the use-after-free issue, move fsid and domain_id from erofs_mount_opts to outside. Fixes: c6be2bd0a5dd ("erofs: register fscache volume") Fixes: 8b7adf1dff3d ("erofs: introduce fscache-based domain") Signed-off-by: Jingbo Xu Reviewed-by: Jia Zhu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20221021023153.1330-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/fscache.c | 14 +++++++------- fs/erofs/internal.h | 6 ++++-- fs/erofs/super.c | 39 ++++++++++++++++++++++----------------- fs/erofs/sysfs.c | 8 ++++---- 4 files changed, 37 insertions(+), 30 deletions(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 260fa4737fc0..6eaf4a4ab95c 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -406,13 +406,13 @@ static void erofs_fscache_domain_put(struct erofs_domain *domain) static int erofs_fscache_register_volume(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); - char *domain_id = sbi->opt.domain_id; + char *domain_id = sbi->domain_id; struct fscache_volume *volume; char *name; int ret = 0; name = kasprintf(GFP_KERNEL, "erofs,%s", - domain_id ? domain_id : sbi->opt.fsid); + domain_id ? domain_id : sbi->fsid); if (!name) return -ENOMEM; @@ -438,7 +438,7 @@ static int erofs_fscache_init_domain(struct super_block *sb) if (!domain) return -ENOMEM; - domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL); + domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); if (!domain->domain_id) { kfree(domain); return -ENOMEM; @@ -475,7 +475,7 @@ static int erofs_fscache_register_domain(struct super_block *sb) mutex_lock(&erofs_domain_list_lock); list_for_each_entry(domain, &erofs_domain_list, list) { - if (!strcmp(domain->domain_id, sbi->opt.domain_id)) { + if (!strcmp(domain->domain_id, sbi->domain_id)) { sbi->domain = domain; sbi->volume = domain->volume; refcount_inc(&domain->ref); @@ -612,7 +612,7 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, char *name, bool need_inode) { - if (EROFS_SB(sb)->opt.domain_id) + if (EROFS_SB(sb)->domain_id) return erofs_domain_register_cookie(sb, name, need_inode); return erofs_fscache_acquire_cookie(sb, name, need_inode); } @@ -644,7 +644,7 @@ int erofs_fscache_register_fs(struct super_block *sb) struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_fscache *fscache; - if (sbi->opt.domain_id) + if (sbi->domain_id) ret = erofs_fscache_register_domain(sb); else ret = erofs_fscache_register_volume(sb); @@ -652,7 +652,7 @@ int erofs_fscache_register_fs(struct super_block *sb) return ret; /* acquired domain/volume will be relinquished in kill_sb() on error */ - fscache = erofs_fscache_register_cookie(sb, sbi->opt.fsid, true); + fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true); if (IS_ERR(fscache)) return PTR_ERR(fscache); diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 1701df48c446..05dc68627722 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -75,8 +75,6 @@ struct erofs_mount_opts { unsigned int max_sync_decompress_pages; #endif unsigned int mount_opt; - char *fsid; - char *domain_id; }; struct erofs_dev_context { @@ -89,6 +87,8 @@ struct erofs_dev_context { struct erofs_fs_context { struct erofs_mount_opts opt; struct erofs_dev_context *devs; + char *fsid; + char *domain_id; }; /* all filesystem-wide lz4 configurations */ @@ -170,6 +170,8 @@ struct erofs_sb_info { struct fscache_volume *volume; struct erofs_fscache *s_fscache; struct erofs_domain *domain; + char *fsid; + char *domain_id; }; #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 2cf96ce1c32e..1c7dcca702b3 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -579,9 +579,9 @@ static int erofs_fc_parse_param(struct fs_context *fc, break; case Opt_fsid: #ifdef CONFIG_EROFS_FS_ONDEMAND - kfree(ctx->opt.fsid); - ctx->opt.fsid = kstrdup(param->string, GFP_KERNEL); - if (!ctx->opt.fsid) + kfree(ctx->fsid); + ctx->fsid = kstrdup(param->string, GFP_KERNEL); + if (!ctx->fsid) return -ENOMEM; #else errorfc(fc, "fsid option not supported"); @@ -589,9 +589,9 @@ static int erofs_fc_parse_param(struct fs_context *fc, break; case Opt_domain_id: #ifdef CONFIG_EROFS_FS_ONDEMAND - kfree(ctx->opt.domain_id); - ctx->opt.domain_id = kstrdup(param->string, GFP_KERNEL); - if (!ctx->opt.domain_id) + kfree(ctx->domain_id); + ctx->domain_id = kstrdup(param->string, GFP_KERNEL); + if (!ctx->domain_id) return -ENOMEM; #else errorfc(fc, "domain_id option not supported"); @@ -728,10 +728,12 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_fs_info = sbi; sbi->opt = ctx->opt; - ctx->opt.fsid = NULL; - ctx->opt.domain_id = NULL; sbi->devs = ctx->devs; ctx->devs = NULL; + sbi->fsid = ctx->fsid; + ctx->fsid = NULL; + sbi->domain_id = ctx->domain_id; + ctx->domain_id = NULL; if (erofs_is_fscache_mode(sb)) { sb->s_blocksize = EROFS_BLKSIZ; @@ -820,7 +822,7 @@ static int erofs_fc_get_tree(struct fs_context *fc) { struct erofs_fs_context *ctx = fc->fs_private; - if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->opt.fsid) + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) return get_tree_nodev(fc, erofs_fc_fill_super); return get_tree_bdev(fc, erofs_fc_fill_super); @@ -834,6 +836,9 @@ static int erofs_fc_reconfigure(struct fs_context *fc) DBG_BUGON(!sb_rdonly(sb)); + if (ctx->fsid || ctx->domain_id) + erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); + if (test_opt(&ctx->opt, POSIX_ACL)) fc->sb_flags |= SB_POSIXACL; else @@ -873,8 +878,8 @@ static void erofs_fc_free(struct fs_context *fc) struct erofs_fs_context *ctx = fc->fs_private; erofs_free_dev_context(ctx->devs); - kfree(ctx->opt.fsid); - kfree(ctx->opt.domain_id); + kfree(ctx->fsid); + kfree(ctx->domain_id); kfree(ctx); } @@ -944,8 +949,8 @@ static void erofs_kill_sb(struct super_block *sb) erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev, NULL); erofs_fscache_unregister_fs(sb); - kfree(sbi->opt.fsid); - kfree(sbi->opt.domain_id); + kfree(sbi->fsid); + kfree(sbi->domain_id); kfree(sbi); sb->s_fs_info = NULL; } @@ -1098,10 +1103,10 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(opt, DAX_NEVER)) seq_puts(seq, ",dax=never"); #ifdef CONFIG_EROFS_FS_ONDEMAND - if (opt->fsid) - seq_printf(seq, ",fsid=%s", opt->fsid); - if (opt->domain_id) - seq_printf(seq, ",domain_id=%s", opt->domain_id); + if (sbi->fsid) + seq_printf(seq, ",fsid=%s", sbi->fsid); + if (sbi->domain_id) + seq_printf(seq, ",domain_id=%s", sbi->domain_id); #endif return 0; } diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c index 783bb7b21b51..fd476961f742 100644 --- a/fs/erofs/sysfs.c +++ b/fs/erofs/sysfs.c @@ -210,14 +210,14 @@ int erofs_register_sysfs(struct super_block *sb) int err; if (erofs_is_fscache_mode(sb)) { - if (sbi->opt.domain_id) { - str = kasprintf(GFP_KERNEL, "%s,%s", sbi->opt.domain_id, - sbi->opt.fsid); + if (sbi->domain_id) { + str = kasprintf(GFP_KERNEL, "%s,%s", sbi->domain_id, + sbi->fsid); if (!str) return -ENOMEM; name = str; } else { - name = sbi->opt.fsid; + name = sbi->fsid; } } else { name = sb->s_id; From 0d10e90cee9eb57882b0f7e19fd699033722e226 Mon Sep 17 00:00:00 2001 From: Zhichao Liu Date: Thu, 10 Nov 2022 15:28:39 +0800 Subject: [PATCH 152/963] spi: mediatek: Fix DEVAPC Violation at KO Remove A DEVAPC violation occurs when removing the module due to accessing HW registers without base clock. To fix this bug, the correct method is: 1. Call the runtime resume function to enable the clock; 2. Operate the registers to reset the HW; 3. Turn off the clocks and disable the device RPM mechanism. Signed-off-by: Zhichao Liu Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221110072839.30961-1-zhichao.liu@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi-mt65xx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index a33c9a3de395..d6aff909fc36 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -1273,8 +1273,11 @@ static int mtk_spi_remove(struct platform_device *pdev) { struct spi_master *master = platform_get_drvdata(pdev); struct mtk_spi *mdata = spi_master_get_devdata(master); + int ret; - pm_runtime_disable(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) + return ret; mtk_spi_reset(mdata); @@ -1283,6 +1286,9 @@ static int mtk_spi_remove(struct platform_device *pdev) clk_unprepare(mdata->spi_hclk); } + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; } From 91d5c5060ee24fe8da88cd585bb43b843d2f0dce Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 10 Nov 2022 16:20:56 +0800 Subject: [PATCH 153/963] pinctrl: devicetree: fix null pointer dereferencing in pinctrl_dt_to_map Here is the BUG report by KASAN about null pointer dereference: BUG: KASAN: null-ptr-deref in strcmp+0x2e/0x50 Read of size 1 at addr 0000000000000000 by task python3/2640 Call Trace: strcmp __of_find_property of_find_property pinctrl_dt_to_map kasprintf() would return NULL pointer when kmalloc() fail to allocate. So directly return ENOMEM, if kasprintf() return NULL pointer. Fixes: 57291ce295c0 ("pinctrl: core device tree mapping table parsing support") Signed-off-by: Zeng Heng Link: https://lore.kernel.org/r/20221110082056.2014898-1-zengheng4@huawei.com Signed-off-by: Linus Walleij --- drivers/pinctrl/devicetree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index ef898ee8ca6b..6e0a40962f38 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -220,6 +220,8 @@ int pinctrl_dt_to_map(struct pinctrl *p, struct pinctrl_dev *pctldev) for (state = 0; ; state++) { /* Retrieve the pinctrl-* property */ propname = kasprintf(GFP_KERNEL, "pinctrl-%d", state); + if (!propname) + return -ENOMEM; prop = of_find_property(np, propname, &size); kfree(propname); if (!prop) { From dba9e3467425800f9d3a14e8b6a0f85c731c1650 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 10 Nov 2022 17:44:45 +0800 Subject: [PATCH 154/963] drm/vc4: kms: Fix IS_ERR() vs NULL check for vc4_kms The drm_atomic_get_new_private_obj_state() function returns NULL on error path, drm_atomic_get_old_private_obj_state() function returns NULL on error path, too, they does not return error pointers. By the way, vc4_hvs_get_new/old_global_state() should return ERR_PTR(-EINVAL), otherwise there will be null-ptr-defer issue, such as follows: In function vc4_atomic_commit_tail(): |-- old_hvs_state = vc4_hvs_get_old_global_state(state); <-- return NULL |-- if (WARN_ON(IS_ERR(old_hvs_state))) <-- no return |-- unsigned long state_rate = max(old_hvs_state->core_clock_rate, new_hvs_state->core_clock_rate); <-- null-ptr-defer Fixes: 9ec03d7f1ed3 ("drm/vc4: kms: Wait on previous FIFO users before a commit") Signed-off-by: Gaosheng Cui Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20221110094445.2930509-6-cuigaosheng1@huawei.com --- drivers/gpu/drm/vc4/vc4_kms.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 4419e810103d..0a6347c05df4 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -197,8 +197,8 @@ vc4_hvs_get_new_global_state(struct drm_atomic_state *state) struct drm_private_state *priv_state; priv_state = drm_atomic_get_new_private_obj_state(state, &vc4->hvs_channels); - if (IS_ERR(priv_state)) - return ERR_CAST(priv_state); + if (!priv_state) + return ERR_PTR(-EINVAL); return to_vc4_hvs_state(priv_state); } @@ -210,8 +210,8 @@ vc4_hvs_get_old_global_state(struct drm_atomic_state *state) struct drm_private_state *priv_state; priv_state = drm_atomic_get_old_private_obj_state(state, &vc4->hvs_channels); - if (IS_ERR(priv_state)) - return ERR_CAST(priv_state); + if (!priv_state) + return ERR_PTR(-EINVAL); return to_vc4_hvs_state(priv_state); } From 876153ab068b2507a19aa3ef481f5b00a2cc780f Mon Sep 17 00:00:00 2001 From: Aishwarya Kothari Date: Wed, 31 Aug 2022 16:16:22 +0200 Subject: [PATCH 155/963] drm/panel: simple: set bpc field for logic technologies displays In case bpc is not set for a panel it then throws a WARN(). Add bpc to the panels logictechno_lt170410_2whc and logictechno_lt161010_2nh. Fixes: 5728fe7fa539 ("drm/panel: simple: add display timings for logic technologies displays") Signed-off-by: Aishwarya Kothari Signed-off-by: Francesco Dolcini Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20220831141622.39605-1-francesco.dolcini@toradex.com --- drivers/gpu/drm/panel/panel-simple.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 2944228a8e2c..8a3b685c2fcc 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2500,6 +2500,7 @@ static const struct display_timing logictechno_lt161010_2nh_timing = { static const struct panel_desc logictechno_lt161010_2nh = { .timings = &logictechno_lt161010_2nh_timing, .num_timings = 1, + .bpc = 6, .size = { .width = 154, .height = 86, @@ -2529,6 +2530,7 @@ static const struct display_timing logictechno_lt170410_2whc_timing = { static const struct panel_desc logictechno_lt170410_2whc = { .timings = &logictechno_lt170410_2whc_timing, .num_timings = 1, + .bpc = 8, .size = { .width = 217, .height = 136, From 36b038791e1e2baea892e9276588815fd14894b4 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 10 Nov 2022 12:44:00 +0000 Subject: [PATCH 156/963] x86/fpu: Drop fpregs lock before inheriting FPU permissions Mike Galbraith reported the following against an old fork of preempt-rt but the same issue also applies to the current preempt-rt tree. BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: systemd preempt_count: 1, expected: 0 RCU nest depth: 0, expected: 0 Preemption disabled at: fpu_clone CPU: 6 PID: 1 Comm: systemd Tainted: G E (unreleased) Call Trace: dump_stack_lvl ? fpu_clone __might_resched rt_spin_lock fpu_clone ? copy_thread ? copy_process ? shmem_alloc_inode ? kmem_cache_alloc ? kernel_clone ? __do_sys_clone ? do_syscall_64 ? __x64_sys_rt_sigprocmask ? syscall_exit_to_user_mode ? do_syscall_64 ? syscall_exit_to_user_mode ? do_syscall_64 ? syscall_exit_to_user_mode ? do_syscall_64 ? exc_page_fault ? entry_SYSCALL_64_after_hwframe Mike says: The splat comes from fpu_inherit_perms() being called under fpregs_lock(), and us reaching the spin_lock_irq() therein due to fpu_state_size_dynamic() returning true despite static key __fpu_state_size_dynamic having never been enabled. Mike's assessment looks correct. fpregs_lock on a PREEMPT_RT kernel disables preemption so calling spin_lock_irq() in fpu_inherit_perms() is unsafe. This problem exists since commit 9e798e9aa14c ("x86/fpu: Prepare fpu_clone() for dynamically enabled features"). Even though the original bug report should not have enabled the paths at all, the bug still exists. fpregs_lock is necessary when editing the FPU registers or a task's FP state but it is not necessary for fpu_inherit_perms(). The only write of any FP state in fpu_inherit_perms() is for the new child which is not running yet and cannot context switch or be borrowed by a kernel thread yet. Hence, fpregs_lock is not protecting anything in the new child until clone() completes and can be dropped earlier. The siglock still needs to be acquired by fpu_inherit_perms() as the read of the parent's permissions has to be serialised. [ bp: Cleanup splat. ] Fixes: 9e798e9aa14c ("x86/fpu: Prepare fpu_clone() for dynamically enabled features") Reported-by: Mike Galbraith Signed-off-by: Mel Gorman Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Cc: Link: https://lore.kernel.org/r/20221110124400.zgymc2lnwqjukgfh@techsingularity.net --- arch/x86/kernel/fpu/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3b28c5b25e12..d00db56a8868 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -605,9 +605,9 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal) if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); save_fpregs_to_fpstate(dst_fpu); + fpregs_unlock(); if (!(clone_flags & CLONE_THREAD)) fpu_inherit_perms(dst_fpu); - fpregs_unlock(); /* * Children never inherit PASID state. From 1aeb122d214b92474c86fde00a03d6e2d69381b5 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 28 Sep 2022 21:51:12 +0200 Subject: [PATCH 157/963] nvmem: lan9662-otp: Fix compatible string The device tree bindings for lan9662-otp expects the compatible string to be one of following compatible strings: microchip,lan9662-otpc microchip,lan9668-otpc The problem is that the lan9662-otp driver contains the microchip,lan9662-otp compatible string instead of microchip,lan9662-otpc. Fix this by updating the compatible string in the driver. Fixes: 9e8f208ad5229d ("nvmem: lan9662-otp: add support") Signed-off-by: Horatiu Vultur Link: https://lore.kernel.org/r/20220928195112.630351-1-horatiu.vultur@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/lan9662-otpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/lan9662-otpc.c b/drivers/nvmem/lan9662-otpc.c index f6732fd216d8..377bf34c2946 100644 --- a/drivers/nvmem/lan9662-otpc.c +++ b/drivers/nvmem/lan9662-otpc.c @@ -203,7 +203,7 @@ static int lan9662_otp_probe(struct platform_device *pdev) } static const struct of_device_id lan9662_otp_match[] = { - { .compatible = "microchip,lan9662-otp", }, + { .compatible = "microchip,lan9662-otpc", }, { }, }; MODULE_DEVICE_TABLE(of, lan9662_otp_match); From b9c1939627f8185dec8ba6d741e9573a4c7a5834 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 29 Sep 2022 18:52:02 +0200 Subject: [PATCH 158/963] slimbus: stream: correct presence rate frequencies Correct few frequencies in presence rate table - multiplied by 10 (110250 instead of 11025 Hz). Fixes: abb9c9b8b51b ("slimbus: stream: add stream support") Cc: Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20220929165202.410937-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/stream.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c index 75f87b3d8b95..73a2aa362957 100644 --- a/drivers/slimbus/stream.c +++ b/drivers/slimbus/stream.c @@ -67,10 +67,10 @@ static const int slim_presence_rate_table[] = { 384000, 768000, 0, /* Reserved */ - 110250, - 220500, - 441000, - 882000, + 11025, + 22050, + 44100, + 88200, 176400, 352800, 705600, From 5fddf8962b429b8303c4a654291ecb6e61a7d747 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 11 Oct 2022 11:14:17 -0600 Subject: [PATCH 159/963] docs: update mediator contact information in CoC doc Update mediator contact information in CoC interpretation document. Cc: Signed-off-by: Shuah Khan Link: https://lore.kernel.org/r/20221011171417.34286-1-skhan@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Documentation/process/code-of-conduct-interpretation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/process/code-of-conduct-interpretation.rst b/Documentation/process/code-of-conduct-interpretation.rst index 922e0b547bc3..66b07f14714c 100644 --- a/Documentation/process/code-of-conduct-interpretation.rst +++ b/Documentation/process/code-of-conduct-interpretation.rst @@ -51,7 +51,7 @@ the Technical Advisory Board (TAB) or other maintainers if you're uncertain how to handle situations that come up. It will not be considered a violation report unless you want it to be. If you are uncertain about approaching the TAB or any other maintainers, please -reach out to our conflict mediator, Joanna Lee . +reach out to our conflict mediator, Joanna Lee . In the end, "be kind to each other" is really what the end goal is for everybody. We know everyone is human and we all fail at times, but the From e54fad8044db18cc400df8d01bfb86cada08b7cb Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Thu, 27 Oct 2022 17:59:04 +0800 Subject: [PATCH 160/963] slimbus: qcom-ngd: Fix build error when CONFIG_SLIM_QCOM_NGD_CTRL=y && CONFIG_QCOM_RPROC_COMMON=m If CONFIG_SLIM_QCOM_NGD_CTRL=y, CONFIG_QCOM_RPROC_COMMON=m, COMPILE_TEST=y, bulding fails: drivers/slimbus/qcom-ngd-ctrl.o: In function `qcom_slim_ngd_ctrl_probe': qcom-ngd-ctrl.c:(.text+0x330): undefined reference to `qcom_register_ssr_notifier' qcom-ngd-ctrl.c:(.text+0x5fc): undefined reference to `qcom_unregister_ssr_notifier' drivers/slimbus/qcom-ngd-ctrl.o: In function `qcom_slim_ngd_remove': qcom-ngd-ctrl.c:(.text+0x90c): undefined reference to `qcom_unregister_ssr_notifier' Make SLIM_QCOM_NGD_CTRL depends on QCOM_RPROC_COMMON || (COMPILE_TEST && !QCOM_RPROC_COMMON) to fix this. Fixes: e291691c6977 ("slimbus: qcom-ngd-ctrl: allow compile testing without QCOM_RPROC_COMMON") Cc: stable Signed-off-by: Zheng Bin Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221027095904.3388959-1-zhengbin13@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/slimbus/Kconfig b/drivers/slimbus/Kconfig index 2ed821f75816..a0fdf9d792cb 100644 --- a/drivers/slimbus/Kconfig +++ b/drivers/slimbus/Kconfig @@ -23,7 +23,7 @@ config SLIM_QCOM_CTRL config SLIM_QCOM_NGD_CTRL tristate "Qualcomm SLIMbus Satellite Non-Generic Device Component" depends on HAS_IOMEM && DMA_ENGINE && NET - depends on QCOM_RPROC_COMMON || COMPILE_TEST + depends on QCOM_RPROC_COMMON || (COMPILE_TEST && !QCOM_RPROC_COMMON) depends on ARCH_QCOM || COMPILE_TEST select QCOM_QMI_HELPERS select QCOM_PDR_HELPERS From ee424f7d3960152f5f862bbb6943e59828dc7917 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 4 Nov 2022 17:52:03 +0100 Subject: [PATCH 161/963] nvmem: u-boot-env: fix crc32_data_offset on redundant u-boot-env The Western Digital MyBook Live (PowerPC 464/APM82181) has a set of redundant u-boot-env. Loading up the driver the following error: | u_boot_env: Invalid calculated CRC32: 0x4f8f2c86 (expected: 0x98b14514) | u_boot_env: probe of partition@1e000 failed with error -22 Looking up the userspace libubootenv utilities source [0], it looks like the "mark" or "flag" is not part of the crc32 sum... which is unfortunate :( |static int libuboot_load(struct uboot_ctx *ctx) |{ |[...] | if (ctx->redundant) { | [...] | offsetdata = offsetof(struct uboot_env_redund, data); | [...] //-----^^ | } | usable_envsize = ctx->size - offsetdata; | buf[0] = malloc(bufsize); |[...] | for (i = 0; i < copies; i++) { | data = (uint8_t *)(buf[i] + offsetdata); | uint32_t crc; | | ret = devread(ctx, i, buf[i]); | [...] | crc = *(uint32_t *)(buf[i] + offsetcrc); | dev->crc = crc32(0, (uint8_t *)data, usable_envsize); | [0] https://github.com/sbabic/libubootenv/blob/master/src/uboot_env.c#L951 Fixes: d5542923f200 ("nvmem: add driver handling U-Boot environment variables") Signed-off-by: Christian Lamparter Link: https://lore.kernel.org/r/70a16eae113e08db2390b76e174f4837caa135c3.1667580636.git.chunkeey@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/u-boot-env.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/u-boot-env.c b/drivers/nvmem/u-boot-env.c index 8e72d1bbd649..4fdbdccebda1 100644 --- a/drivers/nvmem/u-boot-env.c +++ b/drivers/nvmem/u-boot-env.c @@ -135,7 +135,7 @@ static int u_boot_env_parse(struct u_boot_env *priv) break; case U_BOOT_FORMAT_REDUNDANT: crc32_offset = offsetof(struct u_boot_env_image_redundant, crc32); - crc32_data_offset = offsetof(struct u_boot_env_image_redundant, mark); + crc32_data_offset = offsetof(struct u_boot_env_image_redundant, data); data_offset = offsetof(struct u_boot_env_image_redundant, data); break; } From 65946690ed8d972fdb91a74ee75ac0f0f0d68321 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 19 Oct 2022 18:10:53 -0700 Subject: [PATCH 162/963] firmware: coreboot: Register bus in module init The coreboot_table driver registers a coreboot bus while probing a "coreboot_table" device representing the coreboot table memory region. Probing this device (i.e., registering the bus) is a dependency for the module_init() functions of any driver for this bus (e.g., memconsole-coreboot.c / memconsole_driver_init()). With synchronous probe, this dependency works OK, as the link order in the Makefile ensures coreboot_table_driver_init() (and thus, coreboot_table_probe()) completes before a coreboot device driver tries to add itself to the bus. With asynchronous probe, however, coreboot_table_probe() may race with memconsole_driver_init(), and so we're liable to hit one of these two: 1. coreboot_driver_register() eventually hits "[...] the bus was not initialized.", and the memconsole driver fails to register; or 2. coreboot_driver_register() gets past #1, but still races with bus_register() and hits some other undefined/crashing behavior (e.g., in driver_find() [1]) We can resolve this by registering the bus in our initcall, and only deferring "device" work (scanning the coreboot memory region and creating sub-devices) to probe(). [1] Example failure, using 'driver_async_probe=*' kernel command line: [ 0.114217] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 ... [ 0.114307] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc1 #63 [ 0.114316] Hardware name: Google Scarlet (DT) ... [ 0.114488] Call trace: [ 0.114494] _raw_spin_lock+0x34/0x60 [ 0.114502] kset_find_obj+0x28/0x84 [ 0.114511] driver_find+0x30/0x50 [ 0.114520] driver_register+0x64/0x10c [ 0.114528] coreboot_driver_register+0x30/0x3c [ 0.114540] memconsole_driver_init+0x24/0x30 [ 0.114550] do_one_initcall+0x154/0x2e0 [ 0.114560] do_initcall_level+0x134/0x160 [ 0.114571] do_initcalls+0x60/0xa0 [ 0.114579] do_basic_setup+0x28/0x34 [ 0.114588] kernel_init_freeable+0xf8/0x150 [ 0.114596] kernel_init+0x2c/0x12c [ 0.114607] ret_from_fork+0x10/0x20 [ 0.114624] Code: 5280002b 1100054a b900092a f9800011 (885ffc01) [ 0.114631] ---[ end trace 0000000000000000 ]--- Fixes: b81e3140e412 ("firmware: coreboot: Make bus registration symmetric") Cc: Signed-off-by: Brian Norris Reviewed-by: Guenter Roeck Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20221019180934.1.If29e167d8a4771b0bf4a39c89c6946ed764817b9@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/coreboot_table.c | 37 +++++++++++++++++++----- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c index c52bcaa9def6..9ca21feb9d45 100644 --- a/drivers/firmware/google/coreboot_table.c +++ b/drivers/firmware/google/coreboot_table.c @@ -149,12 +149,8 @@ static int coreboot_table_probe(struct platform_device *pdev) if (!ptr) return -ENOMEM; - ret = bus_register(&coreboot_bus_type); - if (!ret) { - ret = coreboot_table_populate(dev, ptr); - if (ret) - bus_unregister(&coreboot_bus_type); - } + ret = coreboot_table_populate(dev, ptr); + memunmap(ptr); return ret; @@ -169,7 +165,6 @@ static int __cb_dev_unregister(struct device *dev, void *dummy) static int coreboot_table_remove(struct platform_device *pdev) { bus_for_each_dev(&coreboot_bus_type, NULL, NULL, __cb_dev_unregister); - bus_unregister(&coreboot_bus_type); return 0; } @@ -199,6 +194,32 @@ static struct platform_driver coreboot_table_driver = { .of_match_table = of_match_ptr(coreboot_of_match), }, }; -module_platform_driver(coreboot_table_driver); + +static int __init coreboot_table_driver_init(void) +{ + int ret; + + ret = bus_register(&coreboot_bus_type); + if (ret) + return ret; + + ret = platform_driver_register(&coreboot_table_driver); + if (ret) { + bus_unregister(&coreboot_bus_type); + return ret; + } + + return 0; +} + +static void __exit coreboot_table_driver_exit(void) +{ + platform_driver_unregister(&coreboot_table_driver); + bus_unregister(&coreboot_bus_type); +} + +module_init(coreboot_table_driver_init); +module_exit(coreboot_table_driver_exit); + MODULE_AUTHOR("Google, Inc."); MODULE_LICENSE("GPL"); From 7c0f8f1462c9edeaa202a2cbea1bde0960434b09 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Thu, 10 Nov 2022 09:44:06 +0100 Subject: [PATCH 163/963] ASoC: stm32: i2s: remove irqf_oneshot flag The IRQF_ONESHOT flag allows to ensure that the interrupt is not unmasked after the hard interrupt context handler has been executed and the thread has been woken. The interrupt line is unmasked after the thread handler function has been executed. The STM32 I2S driver does not implement a threaded IRQ handler. So, the IRQF_ONESHOT flag is not useful in I2S driver. Remove this flag to allow the interrupt routine to be managed as a thread in RT mode. Signed-off-by: Olivier Moysan Link: https://lore.kernel.org/r/20221110084406.287117-1-olivier.moysan@foss.st.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index ce7f6942308f..f3dd9f8e621c 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -1077,7 +1077,7 @@ static int stm32_i2s_parse_dt(struct platform_device *pdev, if (irq < 0) return irq; - ret = devm_request_irq(&pdev->dev, irq, stm32_i2s_isr, IRQF_ONESHOT, + ret = devm_request_irq(&pdev->dev, irq, stm32_i2s_isr, 0, dev_name(&pdev->dev), i2s); if (ret) { dev_err(&pdev->dev, "irq request returned %d\n", ret); From 37882100cd0629d830db430a8cee0b724fe1fea3 Mon Sep 17 00:00:00 2001 From: Junxiao Chang Date: Thu, 10 Nov 2022 07:40:23 +0800 Subject: [PATCH 164/963] ASoC: hdac_hda: fix hda pcm buffer overflow issue When KASAN is enabled, below log might be dumped with Intel EHL hardware: [ 48.583597] ================================================================== [ 48.585921] BUG: KASAN: slab-out-of-bounds in hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] [ 48.587995] Write of size 4 at addr ffff888103489708 by task pulseaudio/759 [ 48.589237] CPU: 2 PID: 759 Comm: pulseaudio Tainted: G U E 5.15.71-intel-ese-standard-lts #9 [ 48.591272] Hardware name: Intel Corporation Elkhart Lake Embedded Platform/ElkhartLake LPDDR4x T3 CRB, BIOS EHLSFWI1.R00.4251.A01.2206130432 06/13/2022 [ 48.593010] Call Trace: [ 48.593648] [ 48.593852] dump_stack_lvl+0x34/0x48 [ 48.594404] print_address_description.constprop.0+0x1f/0x140 [ 48.595174] ? hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] [ 48.595868] ? hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] [ 48.596519] kasan_report.cold+0x7f/0x11b [ 48.597003] ? hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] [ 48.597885] hdac_hda_dai_hw_params+0x20a/0x22b [snd_soc_hdac_hda] HDAC_LAST_DAI_ID is last index id, pcm buffer array size should be +1 to avoid out of bound access. Fixes: 608b8c36c371 ("ASoC: hdac_hda: add support for HDMI/DP as a HDA codec") Reviewed-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Signed-off-by: Junxiao Chang Signed-off-by: Furong Zhou Link: https://lore.kernel.org/r/20221109234023.3111035-1-junxiao.chang@intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/hdac_hda.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/hdac_hda.h b/sound/soc/codecs/hdac_hda.h index fc19c34ca00e..b65560981abb 100644 --- a/sound/soc/codecs/hdac_hda.h +++ b/sound/soc/codecs/hdac_hda.h @@ -14,7 +14,7 @@ enum { HDAC_HDMI_1_DAI_ID, HDAC_HDMI_2_DAI_ID, HDAC_HDMI_3_DAI_ID, - HDAC_LAST_DAI_ID = HDAC_HDMI_3_DAI_ID, + HDAC_DAI_ID_NUM }; struct hdac_hda_pcm { @@ -24,7 +24,7 @@ struct hdac_hda_pcm { struct hdac_hda_priv { struct hda_codec *codec; - struct hdac_hda_pcm pcm[HDAC_LAST_DAI_ID]; + struct hdac_hda_pcm pcm[HDAC_DAI_ID_NUM]; bool need_display_power; }; From 1edfe4ea16ca2c9e91ec6cecd446b13636724e4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Oct 2022 13:54:16 -1000 Subject: [PATCH 165/963] kernfs: Fix spurious lockdep warning in kernfs_find_and_get_node_by_id() c25491747b21 ("kernfs: Add KERNFS_REMOVING flags") made kernfs_find_and_get_node_by_id() test kernfs_active() instead of KERNFS_ACTIVATED. kernfs_find_and_get_by_id() is called without holding the kernfs_rwsem triggering the following lockdep warning. WARNING: CPU: 1 PID: 6191 at fs/kernfs/dir.c:36 kernfs_active+0xe8/0x120 fs/kernfs/dir.c:38 Modules linked in: CPU: 1 PID: 6191 Comm: syz-executor.1 Not tainted 6.0.0-syzkaller-09413-g4899a36f91a9 #0 Hardware name: linux,dummy-virt (DT) pstate: 10000005 (nzcV daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : kernfs_active+0xe8/0x120 fs/kernfs/dir.c:36 lr : lock_is_held include/linux/lockdep.h:283 [inline] lr : kernfs_active+0x94/0x120 fs/kernfs/dir.c:36 sp : ffff8000182c7a00 x29: ffff8000182c7a00 x28: 0000000000000002 x27: 0000000000000001 x26: ffff00000ee1f6a8 x25: 1fffe00001dc3ed5 x24: 0000000000000000 x23: ffff80000ca1fba0 x22: ffff8000089efcb0 x21: 0000000000000001 x20: ffff0000091181d0 x19: ffff0000091181d0 x18: ffff00006a9e6b88 x17: 0000000000000000 x16: 0000000000000000 x15: ffff00006a9e6bc4 x14: 1ffff00003058f0e x13: 1fffe0000258c816 x12: ffff700003058f39 x11: 1ffff00003058f38 x10: ffff700003058f38 x9 : dfff800000000000 x8 : ffff80000e482f20 x7 : ffff0000091d8058 x6 : ffff80000e482c60 x5 : ffff000009402ee8 x4 : 1ffff00001bd1f46 x3 : 1fffe0000258c6d1 x2 : 0000000000000003 x1 : 00000000000000c0 x0 : 0000000000000000 Call trace: kernfs_active+0xe8/0x120 fs/kernfs/dir.c:38 kernfs_find_and_get_node_by_id+0x6c/0x140 fs/kernfs/dir.c:708 __kernfs_fh_to_dentry fs/kernfs/mount.c:102 [inline] kernfs_fh_to_dentry+0x88/0x1fc fs/kernfs/mount.c:128 exportfs_decode_fh_raw+0x104/0x560 fs/exportfs/expfs.c:435 exportfs_decode_fh+0x10/0x5c fs/exportfs/expfs.c:575 do_handle_to_path fs/fhandle.c:152 [inline] handle_to_path fs/fhandle.c:207 [inline] do_handle_open+0x2a4/0x7b0 fs/fhandle.c:223 __do_compat_sys_open_by_handle_at fs/fhandle.c:277 [inline] __se_compat_sys_open_by_handle_at fs/fhandle.c:274 [inline] __arm64_compat_sys_open_by_handle_at+0x6c/0x9c fs/fhandle.c:274 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall+0x6c/0x260 arch/arm64/kernel/syscall.c:52 el0_svc_common.constprop.0+0xc4/0x254 arch/arm64/kernel/syscall.c:142 do_el0_svc_compat+0x40/0x70 arch/arm64/kernel/syscall.c:212 el0_svc_compat+0x54/0x140 arch/arm64/kernel/entry-common.c:772 el0t_32_sync_handler+0x90/0x140 arch/arm64/kernel/entry-common.c:782 el0t_32_sync+0x190/0x194 arch/arm64/kernel/entry.S:586 irq event stamp: 232 hardirqs last enabled at (231): [] raw_spin_rq_unlock_irq kernel/sched/sched.h:1367 [inline] hardirqs last enabled at (231): [] finish_lock_switch kernel/sched/core.c:4943 [inline] hardirqs last enabled at (231): [] finish_task_switch.isra.0+0x200/0x880 kernel/sched/core.c:5061 hardirqs last disabled at (232): [] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:404 softirqs last enabled at (228): [] _stext+0x938/0xf58 softirqs last disabled at (207): [] ____do_softirq+0x10/0x20 arch/arm64/kernel/irq.c:79 ---[ end trace 0000000000000000 ]--- The lockdep warning in kernfs_active() is there to ensure that the activated state stays stable for the caller. For kernfs_find_and_get_node_by_id(), all that's needed is ensuring that a node which has never been activated can't be looked up and guaranteeing lookup success when the caller knows the node to be active, both of which can be achieved by testing the active count without holding the kernfs_rwsem. Fix the spurious warning by introducing __kernfs_active() which doesn't have the lockdep annotation. Signed-off-by: Tejun Heo Reported-by: syzbot+590ce62b128e79cf0a35@syzkaller.appspotmail.com Fixes: c25491747b21 ("kernfs: Add KERNFS_REMOVING flags") Cc: Amir Goldstein Cc: Dmitry Vyukov Link: https://lore.kernel.org/r/Y0SwqBsZ9BMmZv6x@slm.duckdns.org Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 3990f3e270cb..f33b3baad07c 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -31,10 +31,15 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) +static bool __kernfs_active(struct kernfs_node *kn) +{ + return atomic_read(&kn->active) >= 0; +} + static bool kernfs_active(struct kernfs_node *kn) { lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem); - return atomic_read(&kn->active) >= 0; + return __kernfs_active(kn); } static bool kernfs_lockdep(struct kernfs_node *kn) @@ -705,7 +710,12 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, goto err_unlock; } - if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) + /* + * We should fail if @kn has never been activated and guarantee success + * if the caller knows that @kn is active. Both can be achieved by + * __kernfs_active() which tests @kn->active without kernfs_rwsem. + */ + if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) goto err_unlock; spin_unlock(&kernfs_idr_lock); From 7fdba0011157861892c470995ff586a1871e603f Mon Sep 17 00:00:00 2001 From: Anthony DeRossi Date: Wed, 9 Nov 2022 17:40:25 -0800 Subject: [PATCH 166/963] vfio: Fix container device registration life cycle In vfio_device_open(), vfio_device_container_register() is always called when open_count == 1. On error, vfio_device_container_unregister() is only called when open_count == 1 and close_device is set. This leaks a registration for devices without a close_device implementation. In vfio_device_fops_release(), vfio_device_container_unregister() is called unconditionally. This can cause a device to be unregistered multiple times. Treating container device registration/unregistration uniformly (always when open_count == 1) fixes both issues. Fixes: ce4b4657ff18 ("vfio: Replace the DMA unmapping notifier with a callback") Signed-off-by: Anthony DeRossi Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20221110014027.28780-2-ajderossi@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 2d168793d4e1..9a4af880e941 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -801,8 +801,9 @@ static struct file *vfio_device_open(struct vfio_device *device) err_close_device: mutex_lock(&device->dev_set->lock); mutex_lock(&device->group->group_lock); - if (device->open_count == 1 && device->ops->close_device) { - device->ops->close_device(device); + if (device->open_count == 1) { + if (device->ops->close_device) + device->ops->close_device(device); vfio_device_container_unregister(device); } @@ -1017,10 +1018,12 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); mutex_lock(&device->group->group_lock); - if (device->open_count == 1 && device->ops->close_device) - device->ops->close_device(device); + if (device->open_count == 1) { + if (device->ops->close_device) + device->ops->close_device(device); - vfio_device_container_unregister(device); + vfio_device_container_unregister(device); + } mutex_unlock(&device->group->group_lock); device->open_count--; if (device->open_count == 0) From 5cd189e410debedda416fecfc12f4716b5829845 Mon Sep 17 00:00:00 2001 From: Anthony DeRossi Date: Wed, 9 Nov 2022 17:40:26 -0800 Subject: [PATCH 167/963] vfio: Export the device set open count The open count of a device set is the sum of the open counts of all devices in the set. Drivers can use this value to determine whether shared resources are in use without tracking them manually or accessing the private open_count in vfio_device. Signed-off-by: Anthony DeRossi Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20221110014027.28780-3-ajderossi@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_main.c | 13 +++++++++++++ include/linux/vfio.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 9a4af880e941..6e8804fe0095 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -125,6 +125,19 @@ static void vfio_release_device_set(struct vfio_device *device) xa_unlock(&vfio_device_set_xa); } +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) +{ + struct vfio_device *cur; + unsigned int open_count = 0; + + lockdep_assert_held(&dev_set->lock); + + list_for_each_entry(cur, &dev_set->device_list, dev_set_list) + open_count += cur->open_count; + return open_count; +} +EXPORT_SYMBOL_GPL(vfio_device_set_open_count); + /* * Group objects - create, release, get, put, search */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index e7cebeb875dd..fdd393f70b19 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -189,6 +189,7 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device); void vfio_unregister_group_dev(struct vfio_device *device); int vfio_assign_device_set(struct vfio_device *device, void *set_id); +unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set); int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state cur_fsm, From e806e223621e4f5105170df69d7311dc3fb4bbb4 Mon Sep 17 00:00:00 2001 From: Anthony DeRossi Date: Wed, 9 Nov 2022 17:40:27 -0800 Subject: [PATCH 168/963] vfio/pci: Check the device set open count on reset vfio_pci_dev_set_needs_reset() inspects the open_count of every device in the set to determine whether a reset is allowed. The current device always has open_count == 1 within vfio_pci_core_disable(), effectively disabling the reset logic. This field is also documented as private in vfio_device, so it should not be used to determine whether other devices in the set are open. Checking for vfio_device_set_open_count() > 1 on the device set fixes both issues. After commit 2cd8b14aaa66 ("vfio/pci: Move to the device set infrastructure"), failure to create a new file for a device would cause the reset to be skipped due to open_count being decremented after calling close_device() in the error path. After commit eadd86f835c6 ("vfio: Remove calls to vfio_group_add_container_user()"), releasing a device would always skip the reset due to an ordering change in vfio_device_fops_release(). Failing to reset the device leaves it in an unknown state, potentially causing errors when it is accessed later or bound to a different driver. This issue was observed with a Radeon RX Vega 56 [1002:687f] (rev c3) assigned to a Windows guest. After shutting down the guest, unbinding the device from vfio-pci, and binding the device to amdgpu: [ 548.007102] [drm:psp_hw_start [amdgpu]] *ERROR* PSP create ring failed! [ 548.027174] [drm:psp_hw_init [amdgpu]] *ERROR* PSP firmware loading failed [ 548.027242] [drm:amdgpu_device_fw_loading [amdgpu]] *ERROR* hw_init of IP block failed -22 [ 548.027306] amdgpu 0000:0a:00.0: amdgpu: amdgpu_device_ip_init failed [ 548.027308] amdgpu 0000:0a:00.0: amdgpu: Fatal error during GPU init Fixes: 2cd8b14aaa66 ("vfio/pci: Move to the device set infrastructure") Fixes: eadd86f835c6 ("vfio: Remove calls to vfio_group_add_container_user()") Signed-off-by: Anthony DeRossi Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20221110014027.28780-4-ajderossi@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index badc9d828cac..e030c2120183 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -2488,12 +2488,12 @@ static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set) struct vfio_pci_core_device *cur; bool needs_reset = false; - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { - /* No VFIO device in the set can have an open device FD */ - if (cur->vdev.open_count) - return false; + /* No other VFIO device in the set can be open. */ + if (vfio_device_set_open_count(dev_set) > 1) + return false; + + list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) needs_reset |= cur->needs_reset; - } return needs_reset; } From 0bb8e9b36b5b7f2e77892981ff6c27ee831d8026 Mon Sep 17 00:00:00 2001 From: Detlev Casanova Date: Thu, 10 Nov 2022 14:06:12 -0500 Subject: [PATCH 169/963] ASoC: sgtl5000: Reset the CHIP_CLK_CTRL reg on remove Since commit bf2aebccddef ("ASoC: sgtl5000: Fix noise on shutdown/remove"), the device power control registers are reset when the driver is removed/shutdown. This is an issue when the device is configured to use the PLL clock. The device will stop responding if it is still configured to use the PLL clock but the PLL clock is powered down. When rebooting linux, the probe function will show: sgtl5000 0-000a: Error reading chip id -11 Make sure that the CHIP_CLK_CTRL is reset to its default value before powering down the device. Fixes: bf2aebccddef ("ASoC: sgtl5000: Fix noise on shutdown/remove") Signed-off-by: Detlev Casanova Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/20221110190612.1341469-1-detlev.casanova@collabora.com Signed-off-by: Mark Brown --- sound/soc/codecs/sgtl5000.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 4b2135eba74d..a916f4619ea3 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1794,6 +1794,7 @@ static void sgtl5000_i2c_remove(struct i2c_client *client) { struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client); + regmap_write(sgtl5000->regmap, SGTL5000_CHIP_CLK_CTRL, SGTL5000_CHIP_CLK_CTRL_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT); regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT); From 39bd801d6908900e9ab0cdc2655150f95ddd4f1a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Fri, 4 Nov 2022 13:22:13 +0000 Subject: [PATCH 170/963] ASoC: soc-pcm: Don't zero TDM masks in __soc_pcm_open() The DAI tx_mask and rx_mask are set by snd_soc_dai_set_tdm_slot() and used by later code that depends on the TDM settings. So __soc_pcm_open() should not be obliterating those mask values. The code in __soc_pcm_hw_params() uses these masks to calculate the active channels so that only the AIF_IN/AIF_OUT widgets for the active TDM slots are enabled. The zeroing of the masks in __soc_pcm_open() disables this functionality so all AIF widgets were enabled even for channels that are not assigned to a TDM slot. Signed-off-by: Richard Fitzgerald Fixes: 2e5894d73789 ("ASoC: pcm: Add support for DAI multicodec") Link: https://lore.kernel.org/r/20221104132213.121847-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index fb87d6d23408..8ceded22a3c4 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -822,11 +822,6 @@ static int __soc_pcm_open(struct snd_soc_pcm_runtime *rtd, ret = snd_soc_dai_startup(dai, substream); if (ret < 0) goto err; - - if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - dai->tx_mask = 0; - else - dai->rx_mask = 0; } /* Dynamic PCM DAI links compat checks use dynamic capabilities */ From ff963634f7b2e0dc011349abb3fb81a0d074f443 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Tue, 1 Nov 2022 15:07:15 +0800 Subject: [PATCH 171/963] drm/drv: Fix potential memory leak in drm_dev_init() drm_dev_init() will add drm_dev_init_release() as a callback. When drmm_add_action() failed, the release function won't be added. As the result, the ref cnt added by device_get() in drm_dev_init() won't be put by drm_dev_init_release(), which leads to the memleak. Use drmm_add_action_or_reset() instead of drmm_add_action() to prevent memleak. unreferenced object 0xffff88810bc0c800 (size 2048): comm "modprobe", pid 8322, jiffies 4305809845 (age 15.292s) hex dump (first 32 bytes): e8 cc c0 0b 81 88 ff ff ff ff ff ff 00 00 00 00 ................ 20 24 3c 0c 81 88 ff ff 18 c8 c0 0b 81 88 ff ff $<............. backtrace: [<000000007251f72d>] __kmalloc+0x4b/0x1c0 [<0000000045f21f26>] platform_device_alloc+0x2d/0xe0 [<000000004452a479>] platform_device_register_full+0x24/0x1c0 [<0000000089f4ea61>] 0xffffffffa0736051 [<00000000235b2441>] do_one_initcall+0x7a/0x380 [<0000000001a4a177>] do_init_module+0x5c/0x230 [<000000002bf8a8e2>] load_module+0x227d/0x2420 [<00000000637d6d0a>] __do_sys_finit_module+0xd5/0x140 [<00000000c99fc324>] do_syscall_64+0x3f/0x90 [<000000004d85aa77>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 2cbf7fc6718b ("drm: Use drmm_ for drm_dev_init cleanup") Signed-off-by: Shang XiaoJing Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20221101070716.9189-2-shangxiaojing@huawei.com --- drivers/gpu/drm/drm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 8214a0b1ab7f..203bf8d6c34c 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -615,7 +615,7 @@ static int drm_dev_init(struct drm_device *dev, mutex_init(&dev->clientlist_mutex); mutex_init(&dev->master_mutex); - ret = drmm_add_action(dev, drm_dev_init_release, NULL); + ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL); if (ret) return ret; From 4979524f5a2a8210e87fde2f642b0dc060860821 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Tue, 1 Nov 2022 15:07:16 +0800 Subject: [PATCH 172/963] drm: Fix potential null-ptr-deref in drm_vblank_destroy_worker() drm_vblank_init() call drmm_add_action_or_reset() with drm_vblank_init_release() as action. If __drmm_add_action() failed, will directly call drm_vblank_init_release() with the vblank whose worker is NULL. As the resule, a null-ptr-deref will happen in kthread_destroy_worker(). Add the NULL check before calling drm_vblank_destroy_worker(). BUG: null-ptr-deref KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f] CPU: 5 PID: 961 Comm: modprobe Not tainted 6.0.0-11331-gd465bff130bf-dirty RIP: 0010:kthread_destroy_worker+0x25/0xb0 Call Trace: drm_vblank_init_release+0x124/0x220 [drm] ? drm_crtc_vblank_restore+0x8b0/0x8b0 [drm] __drmm_add_action_or_reset+0x41/0x50 [drm] drm_vblank_init+0x282/0x310 [drm] vkms_init+0x35f/0x1000 [vkms] ? 0xffffffffc4508000 ? lock_is_held_type+0xd7/0x130 ? __kmem_cache_alloc_node+0x1c2/0x2b0 ? lock_is_held_type+0xd7/0x130 ? 0xffffffffc4508000 do_one_initcall+0xd0/0x4f0 ... do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 5e6c2b4f9161 ("drm/vblank: Add vblank works") Signed-off-by: Shang XiaoJing Reviewed-by: Lyude Paul Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20221101070716.9189-3-shangxiaojing@huawei.com --- drivers/gpu/drm/drm_internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 7bb98e6a446d..5ea5e260118c 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -104,7 +104,8 @@ static inline void drm_vblank_flush_worker(struct drm_vblank_crtc *vblank) static inline void drm_vblank_destroy_worker(struct drm_vblank_crtc *vblank) { - kthread_destroy_worker(vblank->worker); + if (vblank->worker) + kthread_destroy_worker(vblank->worker); } int drm_vblank_worker_init(struct drm_vblank_crtc *vblank); From 63fd9437ec81899fc36bb642d558378bc89aa4f9 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Tue, 1 Nov 2022 10:30:31 +0100 Subject: [PATCH 173/963] arm64: dts: imx8mm-tqma8mqml-mba8mx: Fix USB DR Using extcon USB host mode works properly on DR interface, e.g. enabling/disabling VBUS. But USB device mode is not working. Fix this by switching to usb-role-switch instead. Fixes: dfcd1b6f7620 ("arm64: dts: freescale: add initial device tree for TQMa8MQML with i.MX8MM") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- .../dts/freescale/imx8mm-tqma8mqml-mba8mx.dts | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts index 7e0aeb2db305..a0aeac619929 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts @@ -34,11 +34,25 @@ reg_usdhc2_vmmc: regulator-vmmc { off-on-delay-us = <12000>; }; - extcon_usbotg1: extcon-usbotg1 { - compatible = "linux,extcon-usb-gpio"; + connector { + compatible = "gpio-usb-b-connector", "usb-b-connector"; + type = "micro"; + label = "X19"; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usb1_extcon>; - id-gpio = <&gpio1 10 GPIO_ACTIVE_HIGH>; + pinctrl-0 = <&pinctrl_usb1_connector>; + id-gpios = <&gpio1 10 GPIO_ACTIVE_HIGH>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + usb_dr_connector: endpoint { + remote-endpoint = <&usb1_drd_sw>; + }; + }; + }; }; }; @@ -105,13 +119,19 @@ &usbotg1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usbotg1>; dr_mode = "otg"; - extcon = <&extcon_usbotg1>; srp-disable; hnp-disable; adp-disable; power-active-high; over-current-active-low; + usb-role-switch; status = "okay"; + + port { + usb1_drd_sw: endpoint { + remote-endpoint = <&usb_dr_connector>; + }; + }; }; &usbotg2 { @@ -231,7 +251,7 @@ pinctrl_usbotg1: usbotg1grp { ; }; - pinctrl_usb1_extcon: usb1-extcongrp { + pinctrl_usb1_connector: usb1-connectorgrp { fsl,pins = ; }; From 9ed1fdee9ee324f3505ff066287ee53143caaaa2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 11 Nov 2022 00:22:24 +0000 Subject: [PATCH 174/963] drm/i915/gvt: Get reference to KVM iff attachment to VM is successful Get a reference to KVM if and only if a vGPU is successfully attached to the VM to avoid leaking a reference if there's no available vGPU. On open_device() failure, vfio_device_open() doesn't invoke close_device(). Fixes: 421cfe6596f6 ("vfio: remove VFIO_GROUP_NOTIFY_SET_KVM") Cc: stable@vger.kernel.org Reviewed-by: Kevin Tian Signed-off-by: Sean Christopherson Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221111002225.2418386-2-seanjc@google.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7a45e5360caf..e67d5267fde0 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -664,8 +664,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) return -ESRCH; } - kvm_get_kvm(vgpu->vfio_device.kvm); - if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; @@ -676,6 +674,7 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; + kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); From 3c9fd44b9330adc5006653566f3d386784b2080e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 11 Nov 2022 00:22:25 +0000 Subject: [PATCH 175/963] drm/i915/gvt: Unconditionally put reference to KVM when detaching vGPU Always put the KVM reference when closing a vCPU device, as intel_vgpu_open_device() succeeds if and only if the KVM pointer is valid and a reference to KVM is acquired. And if that doesn't hold true, the call to kvm_page_track_unregister_notifier() a few lines earlier is doomed. Reviewed-by: Kevin Tian Signed-off-by: Sean Christopherson Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221111002225.2418386-3-seanjc@google.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e67d5267fde0..714221f9a131 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -714,15 +714,14 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + kvm_put_kvm(vgpu->vfio_device.kvm); + kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; - - if (vgpu->vfio_device.kvm) - kvm_put_kvm(vgpu->vfio_device.kvm); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) From 753395ea1e45c724150070b5785900b6a44bd5fb Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 2 Nov 2022 20:19:45 +0100 Subject: [PATCH 176/963] ARM: dts: imx7: Fix NAND controller size-cells The NAND controller size-cells should be 0 per DT bindings. Fix the following warning produces by DT bindings check: " nand-controller@33002000: #size-cells:0:0: 0 was expected nand-controller@33002000: Unevaluated properties are not allowed ('#address-cells', '#size-cells' were unexpected) " Fix the missing space in node name too. Fixes: e7495a45a76de ("ARM: dts: imx7: add GPMI NAND and APBH DMA") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7s.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 0fc9e6b8b05d..03d2e8544a4e 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1270,10 +1270,10 @@ dma_apbh: dma-apbh@33000000 { clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000{ + gpmi: nand-controller@33002000 { compatible = "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; From 1610233bc2c2cae2dff9e101e6ea5ef69cceb0e9 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 2 Nov 2022 20:19:46 +0100 Subject: [PATCH 177/963] arm64: dts: imx8mm: Fix NAND controller size-cells The NAND controller size-cells should be 0 per DT bindings. Fix the following warning produces by DT bindings check: " nand-controller@33002000: #size-cells:0:0: 0 was expected nand-controller@33002000: Unevaluated properties are not allowed ('#address-cells', '#size-cells' were unexpected) " Fix the missing space in node name too. Fixes: a05ea40eb384e ("arm64: dts: imx: Add i.mx8mm dtsi support") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index dabd94dc30c4..50ef92915c67 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -1244,10 +1244,10 @@ dma_apbh: dma-controller@33000000 { clocks = <&clk IMX8MM_CLK_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000{ + gpmi: nand-controller@33002000 { compatible = "fsl,imx8mm-gpmi-nand", "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; From 5468e93b5b1083eaa729f98e59da18c85d9c4126 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 2 Nov 2022 20:19:47 +0100 Subject: [PATCH 178/963] arm64: dts: imx8mn: Fix NAND controller size-cells The NAND controller size-cells should be 0 per DT bindings. Fix the following warning produces by DT bindings check: " nand-controller@33002000: #size-cells:0:0: 0 was expected nand-controller@33002000: Unevaluated properties are not allowed ('#address-cells', '#size-cells' were unexpected) " Fixes: 6c3debcbae47a ("arm64: dts: freescale: Add i.MX8MN dtsi support") Signed-off-by: Marek Vasut Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index ad0b99adf691..67b554ba690c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -1102,7 +1102,7 @@ dma_apbh: dma-controller@33000000 { gpmi: nand-controller@33002000 { compatible = "fsl,imx8mn-gpmi-nand", "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <1>; + #size-cells = <0>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; From 2db1fdb25d209a88112fd82eb493976d66057d10 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 4 Nov 2022 13:49:42 +0800 Subject: [PATCH 179/963] arm64: dts: imx93-pinfunc: drop execution permission Drop the header file execution permission Signed-off-by: Peng Fan Fixes: ec8b5b5058ea ("arm64: dts: freescale: Add i.MX93 dtsi support") Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-pinfunc.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 arch/arm64/boot/dts/freescale/imx93-pinfunc.h diff --git a/arch/arm64/boot/dts/freescale/imx93-pinfunc.h b/arch/arm64/boot/dts/freescale/imx93-pinfunc.h old mode 100755 new mode 100644 From f3a72878a3de720661b7ed0d6b7f7c506ddb8a52 Mon Sep 17 00:00:00 2001 From: Jaco Coetzee Date: Wed, 9 Nov 2022 15:27:57 -0500 Subject: [PATCH 180/963] nfp: change eeprom length to max length enumerators Extend the size of QSFP EEPROM for types SSF8436 and SFF8636 from 256 to 640 bytes in order to expose all the EEPROM pages by ethtool. For SFF-8636 and SFF-8436 specifications, the driver exposes 256 bytes of EEPROM data for ethtool's get_module_eeprom() callback, resulting in "netlink error: Invalid argument" when an EEPROM read with an offset larger than 256 bytes is attempted. Changing the length enumerators to the _MAX_LEN variants exposes all 640 bytes of the EEPROM allowing upper pages 1, 2 and 3 to be read. Fixes: 96d971e307cc ("ethtool: Add fallback to get_module_eeprom from netlink command") Signed-off-by: Jaco Coetzee Reviewed-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 22a5d2419084..1775997f9c69 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1477,15 +1477,15 @@ nfp_port_get_module_info(struct net_device *netdev, if (data < 0x3) { modinfo->type = ETH_MODULE_SFF_8436; - modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN; } else { modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; } break; case NFP_INTERFACE_QSFP28: modinfo->type = ETH_MODULE_SFF_8636; - modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN; break; default: netdev_err(netdev, "Unsupported module 0x%x detected\n", From 1dcdf5f5b2137185cbdd5385f29949ab3da4f00c Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Fri, 11 Nov 2022 15:12:12 +0800 Subject: [PATCH 181/963] cifs: Fix connections leak when tlink setup failed If the tlink setup failed, lost to put the connections, then the module refcnt leak since the cifsd kthread not exit. Also leak the fscache info, and for next mount with fsc, it will print the follow errors: CIFS: Cache volume key already in use (cifs,127.0.0.1:445,TEST) Let's check the result of tlink setup, and do some cleanup. Fixes: 56c762eb9bee ("cifs: Refactor out cifs_mount()") Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French --- fs/cifs/connect.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1cc47dd3b4d6..9db9527c61cf 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3855,9 +3855,13 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id); out: - free_xid(mnt_ctx.xid); cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); - return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + if (rc) + goto error; + + free_xid(mnt_ctx.xid); + return rc; error: dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); @@ -3884,8 +3888,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) goto error; } + rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + if (rc) + goto error; + free_xid(mnt_ctx.xid); - return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); + return rc; error: mount_put_conns(&mnt_ctx); From 8678ea06852cd1f819b870c773d43df888d15d46 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Thu, 10 Nov 2022 09:56:13 +0100 Subject: [PATCH 182/963] maccess: Fix writing offset in case of fault in strncpy_from_kernel_nofault() If a page fault occurs while copying the first byte, this function resets one byte before dst. As a consequence, an address could be modified and leaded to kernel crashes if case the modified address was accessed later. Fixes: b58294ead14c ("maccess: allow architectures to provide kernel probing directly") Signed-off-by: Alban Crequy Signed-off-by: Andrii Nakryiko Tested-by: Francis Laniel Reviewed-by: Andrew Morton Cc: [5.8] Link: https://lore.kernel.org/bpf/20221110085614.111213-2-albancrequy@linux.microsoft.com --- mm/maccess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/maccess.c b/mm/maccess.c index 5f4d240f67ec..074f6b086671 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -97,7 +97,7 @@ long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count) return src - unsafe_addr; Efault: pagefault_enable(); - dst[-1] = '\0'; + dst[0] = '\0'; return -EFAULT; } From 9cd094829dae949a755c18533479c20e74415ab2 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Thu, 10 Nov 2022 09:56:14 +0100 Subject: [PATCH 183/963] selftests: bpf: Add a test when bpf_probe_read_kernel_str() returns EFAULT This commit tests previous fix of bpf_probe_read_kernel_str(). The BPF helper bpf_probe_read_kernel_str should return -EFAULT when given a bad source pointer and the target buffer should only be modified to make the string NULL terminated. bpf_probe_read_kernel_str() was previously inserting a NULL before the beginning of the dst buffer. This test should ensure that the implementation stays correct for now on. Without the fix, this test will fail as follows: $ cd tools/testing/selftests/bpf $ make $ sudo ./test_progs --name=varlen ... test_varlen:FAIL:check got 0 != exp 66 Signed-off-by: Alban Crequy Signed-off-by: Francis Laniel Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221110085614.111213-3-albancrequy@linux.microsoft.com Changes v1 to v2: - add ack tag - fix my email - rebase on bpf tree and tag for bpf tree --- tools/testing/selftests/bpf/prog_tests/varlen.c | 7 +++++++ tools/testing/selftests/bpf/progs/test_varlen.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c index dd324b4933db..4d7056f8f177 100644 --- a/tools/testing/selftests/bpf/prog_tests/varlen.c +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -63,6 +63,13 @@ void test_varlen(void) CHECK_VAL(data->total4, size1 + size2); CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", "doesn't match!\n"); + + CHECK_VAL(bss->ret_bad_read, -EFAULT); + CHECK_VAL(data->payload_bad[0], 0x42); + CHECK_VAL(data->payload_bad[1], 0x42); + CHECK_VAL(data->payload_bad[2], 0); + CHECK_VAL(data->payload_bad[3], 0x42); + CHECK_VAL(data->payload_bad[4], 0x42); cleanup: test_varlen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index 3987ff174f1f..20eb7d422c41 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -19,6 +19,7 @@ __u64 payload1_len1 = 0; __u64 payload1_len2 = 0; __u64 total1 = 0; char payload1[MAX_LEN + MAX_LEN] = {}; +__u64 ret_bad_read = 0; /* .data */ int payload2_len1 = -1; @@ -36,6 +37,8 @@ int payload4_len2 = -1; int total4= -1; char payload4[MAX_LEN + MAX_LEN] = { 1 }; +char payload_bad[5] = { 0x42, 0x42, 0x42, 0x42, 0x42 }; + SEC("raw_tp/sys_enter") int handler64_unsigned(void *regs) { @@ -61,6 +64,8 @@ int handler64_unsigned(void *regs) total1 = payload - (void *)payload1; + ret_bad_read = bpf_probe_read_kernel_str(payload_bad + 2, 1, (void *) -1); + return 0; } From 4b45cd81f737d79d0fbfc0d320a1e518e7f0bbf0 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Thu, 10 Nov 2022 07:21:28 -0500 Subject: [PATCH 184/963] bpf: Initialize same number of free nodes for each pcpu_freelist pcpu_freelist_populate() initializes nr_elems / num_possible_cpus() + 1 free nodes for some CPUs, and then possibly one CPU with fewer nodes, followed by remaining cpus with 0 nodes. For example, when nr_elems == 256 and num_possible_cpus() == 32, CPU 0~27 each gets 9 free nodes, CPU 28 gets 4 free nodes, CPU 29~31 get 0 free nodes, while in fact each CPU should get 8 nodes equally. This patch initializes nr_elems / num_possible_cpus() free nodes for each CPU firstly, then allocates the remaining free nodes by one for each CPU until no free nodes left. Fixes: e19494edab82 ("bpf: introduce percpu_freelist") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20221110122128.105214-1-xukuohai@huawei.com --- kernel/bpf/percpu_freelist.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index b6e7f5c5b9ab..034cf87b54e9 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -100,22 +100,21 @@ void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size, u32 nr_elems) { struct pcpu_freelist_head *head; - int i, cpu, pcpu_entries; + unsigned int cpu, cpu_idx, i, j, n, m; - pcpu_entries = nr_elems / num_possible_cpus() + 1; - i = 0; + n = nr_elems / num_possible_cpus(); + m = nr_elems % num_possible_cpus(); + cpu_idx = 0; for_each_possible_cpu(cpu) { -again: head = per_cpu_ptr(s->freelist, cpu); - /* No locking required as this is not visible yet. */ - pcpu_freelist_push_node(head, buf); - i++; - buf += elem_size; - if (i == nr_elems) - break; - if (i % pcpu_entries) - goto again; + j = n + (cpu_idx < m ? 1 : 0); + for (i = 0; i < j; i++) { + /* No locking required as this is not visible yet. */ + pcpu_freelist_push_node(head, buf); + buf += elem_size; + } + cpu_idx++; } } From 1f6e04a1c7b85da3b765ca9f46029e5d1826d839 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 11 Nov 2022 07:56:20 -0500 Subject: [PATCH 185/963] bpf: Fix offset calculation error in __copy_map_value and zero_map_value Function __copy_map_value and zero_map_value miscalculated copy offset, resulting in possible copy of unwanted data to user or kernel. Fix it. Fixes: cc48755808c6 ("bpf: Add zero_map_value to zero map value with special fields") Fixes: 4d7d7f69f4b1 ("bpf: Adapt copy_map_value for multiple offset case") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20221111125620.754855-1-xukuohai@huaweicloud.com --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 74c6f449d81e..c1bd1bd10506 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -315,7 +315,7 @@ static inline void __copy_map_value(struct bpf_map *map, void *dst, void *src, b u32 next_off = map->off_arr->field_off[i]; memcpy(dst + curr_off, src + curr_off, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); } @@ -344,7 +344,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) u32 next_off = map->off_arr->field_off[i]; memset(dst + curr_off, 0, next_off - curr_off); - curr_off += map->off_arr->field_sz[i]; + curr_off = next_off + map->off_arr->field_sz[i]; } memset(dst + curr_off, 0, map->value_size - curr_off); } From ee6815416380bc069b7dcbdff0682d4c53617527 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 3 Nov 2022 20:06:01 +0100 Subject: [PATCH 186/963] x86/hyperv: Restore VP assist page after cpu offlining/onlining Commit e5d9b714fe40 ("x86/hyperv: fix root partition faults when writing to VP assist page MSR") moved 'wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE)' under 'if (*hvp)' condition. This works for root partition as hv_cpu_die() does memunmap() and sets 'hv_vp_assist_page[cpu]' to NULL but breaks non-root partitions as hv_cpu_die() doesn't free 'hv_vp_assist_page[cpu]' for them. This causes VP assist page to remain unset after CPU offline/online cycle: $ rdmsr -p 24 0x40000073 10212f001 $ echo 0 > /sys/devices/system/cpu/cpu24/online $ echo 1 > /sys/devices/system/cpu/cpu24/online $ rdmsr -p 24 0x40000073 0 Fix the issue by always writing to HV_X64_MSR_VP_ASSIST_PAGE in hv_cpu_init(). Note, checking 'if (!*hvp)', for root partition is pointless as hv_cpu_die() always sets 'hv_vp_assist_page[cpu]' to NULL (and it's also NULL initially). Note: the fact that 'hv_vp_assist_page[cpu]' is reset to NULL may present a (potential) issue for KVM. While Hyper-V uses CPUHP_AP_ONLINE_DYN stage in CPU hotplug, KVM uses CPUHP_AP_KVM_STARTING which comes earlier in CPU teardown sequence. It is theoretically possible that Enlightened VMCS is still in use. It is unclear if the issue is real and if using KVM with Hyper-V root partition is even possible. While on it, drop the unneeded smp_processor_id() call from hv_cpu_init(). Fixes: e5d9b714fe40 ("x86/hyperv: fix root partition faults when writing to VP assist page MSR") Signed-off-by: Vitaly Kuznetsov Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20221103190601.399343-1-vkuznets@redhat.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_init.c | 54 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index f49bc3ec76e6..a269049a43ce 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -77,7 +77,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; int ret; ret = hv_common_cpu_init(cpu); @@ -87,34 +87,32 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; - if (!*hvp) { - if (hv_root_partition) { - /* - * For root partition we get the hypervisor provided VP assist - * page, instead of allocating a new page. - */ - rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - *hvp = memremap(msr.pfn << - HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, - PAGE_SIZE, MEMREMAP_WB); - } else { - /* - * The VP assist page is an "overlay" page (see Hyper-V TLFS's - * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed - * out to make sure we always write the EOI MSR in - * hv_apic_eoi_write() *after* the EOI optimization is disabled - * in hv_cpu_die(), otherwise a CPU may not be stopped in the - * case of CPU offlining and the VM will hang. - */ + if (hv_root_partition) { + /* + * For root partition we get the hypervisor provided VP assist + * page, instead of allocating a new page. + */ + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + *hvp = memremap(msr.pfn << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, + PAGE_SIZE, MEMREMAP_WB); + } else { + /* + * The VP assist page is an "overlay" page (see Hyper-V TLFS's + * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed + * out to make sure we always write the EOI MSR in + * hv_apic_eoi_write() *after* the EOI optimization is disabled + * in hv_cpu_die(), otherwise a CPU may not be stopped in the + * case of CPU offlining and the VM will hang. + */ + if (!*hvp) *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); - if (*hvp) - msr.pfn = vmalloc_to_pfn(*hvp); - } - WARN_ON(!(*hvp)); - if (*hvp) { - msr.enable = 1; - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); - } + if (*hvp) + msr.pfn = vmalloc_to_pfn(*hvp); + + } + if (!WARN_ON(!(*hvp))) { + msr.enable = 1; + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); } return hyperv_init_ghcb(); From b8a5376c321b4669f7ffabc708fd30c3970f3084 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 9 Nov 2022 10:48:42 -0800 Subject: [PATCH 187/963] scsi: storvsc: Fix handling of srb_status and capacity change events Current handling of the srb_status is incorrect. Commit 52e1b3b3daa9 ("scsi: storvsc: Correctly handle multiple flags in srb_status") is based on srb_status being a set of flags, when in fact only the 2 high order bits are flags and the remaining 6 bits are an integer status. Because the integer values of interest mostly look like flags, the code actually works when treated that way. But in the interest of correctness going forward, fix this by treating the low 6 bits of srb_status as an integer status code. Add handling for SRB_STATUS_INVALID_REQUEST, which was the original intent of commit 52e1b3b3daa9. Furthermore, treat the ERROR, ABORTED, and INVALID_REQUEST srb status codes as essentially equivalent for the cases we care about. There's no harm in doing so, and it isn't always clear which status code current or older versions of Hyper-V report for particular conditions. Treating the srb status codes as equivalent has the additional benefit of ensuring that capacity change events result in an immediate rescan so that the new size is known to Linux. Existing code checks SCSI sense data for capacity change events when the srb status is ABORTED. But capacity change events are also being observed when Hyper-V reports the srb status as ERROR. Without the immediate rescan, the new size isn't known until something else causes a rescan (such as running fdisk to expand a partition), and in the meantime, tools such as "lsblk" continue to report the old size. Fixes: 52e1b3b3daa9 ("scsi: storvsc: Correctly handle multiple flags in srb_status") Reported-by: Juan Tian Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1668019722-1983-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/scsi/storvsc_drv.c | 69 +++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index bc46721aa01c..3c5b7e4227b2 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -303,16 +303,21 @@ enum storvsc_request_type { }; /* - * SRB status codes and masks; a subset of the codes used here. + * SRB status codes and masks. In the 8-bit field, the two high order bits + * are flags, while the remaining 6 bits are an integer status code. The + * definitions here include only the subset of the integer status codes that + * are tested for in this driver. */ - #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_QUEUE_FROZEN 0x40 -#define SRB_STATUS_INVALID_LUN 0x20 -#define SRB_STATUS_SUCCESS 0x01 -#define SRB_STATUS_ABORTED 0x02 -#define SRB_STATUS_ERROR 0x04 -#define SRB_STATUS_DATA_OVERRUN 0x12 + +/* SRB status integer codes */ +#define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 +#define SRB_STATUS_ERROR 0x04 +#define SRB_STATUS_INVALID_REQUEST 0x06 +#define SRB_STATUS_DATA_OVERRUN 0x12 +#define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS(status) \ (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN)) @@ -969,38 +974,25 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, void (*process_err_fn)(struct work_struct *work); struct hv_host_device *host_dev = shost_priv(host); - /* - * In some situations, Hyper-V sets multiple bits in the - * srb_status, such as ABORTED and ERROR. So process them - * individually, with the most specific bits first. - */ + switch (SRB_STATUS(vm_srb->srb_status)) { + case SRB_STATUS_ERROR: + case SRB_STATUS_ABORTED: + case SRB_STATUS_INVALID_REQUEST: + if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) { + /* Check for capacity change */ + if ((asc == 0x2a) && (ascq == 0x9)) { + process_err_fn = storvsc_device_scan; + /* Retry the I/O that triggered this. */ + set_host_byte(scmnd, DID_REQUEUE); + goto do_work; + } - if (vm_srb->srb_status & SRB_STATUS_INVALID_LUN) { - set_host_byte(scmnd, DID_NO_CONNECT); - process_err_fn = storvsc_remove_lun; - goto do_work; - } - - if (vm_srb->srb_status & SRB_STATUS_ABORTED) { - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID && - /* Capacity data has changed */ - (asc == 0x2a) && (ascq == 0x9)) { - process_err_fn = storvsc_device_scan; /* - * Retry the I/O that triggered this. + * Otherwise, let upper layer deal with the + * error when sense message is present */ - set_host_byte(scmnd, DID_REQUEUE); - goto do_work; - } - } - - if (vm_srb->srb_status & SRB_STATUS_ERROR) { - /* - * Let upper layer deal with error when - * sense message is present. - */ - if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) return; + } /* * If there is an error; offline the device since all @@ -1023,6 +1015,13 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb, default: set_host_byte(scmnd, DID_ERROR); } + return; + + case SRB_STATUS_INVALID_LUN: + set_host_byte(scmnd, DID_NO_CONNECT); + process_err_fn = storvsc_remove_lun; + goto do_work; + } return; From 6f928ab8ee9bfbcb0e631c47ea8a16c3d5116ff1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 9 Nov 2022 15:01:36 +0000 Subject: [PATCH 188/963] net: bgmac: Drop free_netdev() from bgmac_enet_remove() netdev is allocated in bgmac_alloc() with devm_alloc_etherdev() and will be auto released in ->remove and ->probe failure path. Using free_netdev() in bgmac_enet_remove() leads to double free. Fixes: 34a5102c3235 ("net: bgmac: allocate struct bgmac just once & don't copy it") Signed-off-by: Wei Yongjun Link: https://lore.kernel.org/r/20221109150136.2991171-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bgmac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 5fb3af5670ec..3038386a5afd 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1568,7 +1568,6 @@ void bgmac_enet_remove(struct bgmac *bgmac) phy_disconnect(bgmac->net_dev->phydev); netif_napi_del(&bgmac->napi); bgmac_dma_free(bgmac); - free_netdev(bgmac->net_dev); } EXPORT_SYMBOL_GPL(bgmac_enet_remove); From 98a2ac1ca8fd6eca6867726fe238d06e75eb1acd Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 9 Nov 2022 21:28:32 +0800 Subject: [PATCH 189/963] mISDN: fix possible memory leak in mISDN_dsp_element_register() Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array"), the name of device is allocated dynamically, use put_device() to give up the reference, so that the name can be freed in kobject_cleanup() when the refcount is 0. The 'entry' is going to be freed in mISDN_dsp_dev_release(), so the kfree() is removed. list_del() is called in mISDN_dsp_dev_release(), so it need be initialized. Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221109132832.3270119-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/isdn/mISDN/dsp_pipeline.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c index c3b2c99b5cd5..cfbcd9e973c2 100644 --- a/drivers/isdn/mISDN/dsp_pipeline.c +++ b/drivers/isdn/mISDN/dsp_pipeline.c @@ -77,6 +77,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) if (!entry) return -ENOMEM; + INIT_LIST_HEAD(&entry->list); entry->elem = elem; entry->dev.class = elements_class; @@ -107,7 +108,7 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) device_unregister(&entry->dev); return ret; err1: - kfree(entry); + put_device(&entry->dev); return ret; } EXPORT_SYMBOL(mISDN_dsp_element_register); From 8eab9be56cc6b702a445d2b6d0256aa0992316b3 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 10 Nov 2022 02:16:42 +0000 Subject: [PATCH 190/963] net: hinic: Fix error handling in hinic_module_init() A problem about hinic create debugfs failed is triggered with the following log given: [ 931.419023] debugfs: Directory 'hinic' with parent '/' already present! The reason is that hinic_module_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of hinic can never be created later. hinic_module_init() hinic_dbg_register_debugfs() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: 253ac3a97921 ("hinic: add support to query sq info") Signed-off-by: Yuan Can Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221110021642.80378-1-yuancan@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index e1f54a2f28b2..2d6906aba2a2 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1474,8 +1474,15 @@ static struct pci_driver hinic_driver = { static int __init hinic_module_init(void) { + int ret; + hinic_dbg_register_debugfs(HINIC_DRV_NAME); - return pci_register_driver(&hinic_driver); + + ret = pci_register_driver(&hinic_driver); + if (ret) + hinic_dbg_unregister_debugfs(); + + return ret; } static void __exit hinic_module_exit(void) From e2a54350dc9642e7dfc07335ca355581caa9dbfe Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 10 Nov 2022 13:49:38 +0800 Subject: [PATCH 191/963] net: phy: dp83867: Fix SGMII FIFO depth for non OF devices Current driver code will read device tree node information, and set default values if there is no info provided. This is not done in non-OF devices leading to SGMII fifo depths being set to the smallest size. This patch sets the value to the default value of the PHY as stated in the PHY datasheet. Fixes: 4dc08dcc9f6f ("net: phy: dp83867: introduce critical chip default init for non-of platform") Signed-off-by: Michael Sit Wei Hong Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221110054938.925347-1-michael.wei.hong.sit@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83867.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 417527f8bbf5..7446d5c6c714 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -682,6 +682,13 @@ static int dp83867_of_init(struct phy_device *phydev) */ dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN / 2; + /* For non-OF device, the RX and TX FIFO depths are taken from + * default value. So, we init RX & TX FIFO depths here + * so that it is configured correctly later in dp83867_config_init(); + */ + dp83867->tx_fifo_depth = DP83867_PHYCR_FIFO_DEPTH_4_B_NIB; + dp83867->rx_fifo_depth = DP83867_PHYCR_FIFO_DEPTH_4_B_NIB; + return 0; } #endif /* CONFIG_OF_MDIO */ From 77711683a50477de39757d67ab1a3638220d6860 Mon Sep 17 00:00:00 2001 From: Mohd Faizal Abdul Rahim Date: Thu, 10 Nov 2022 14:45:52 +0800 Subject: [PATCH 192/963] net: stmmac: ensure tx function is not running in stmmac_xdp_release() When stmmac_xdp_release() is called, there is a possibility that tx function is still running on other queues which will lead to tx queue timed out and reset adapter. This commit ensure that tx function is not running xdp before release flow continue to run. Fixes: ac746c8520d9 ("net: stmmac: enhance XDP ZC driver level switching performance") Signed-off-by: Song Yoong Siang Signed-off-by: Mohd Faizal Abdul Rahim Signed-off-by: Noor Azura Ahmad Tarmizi Link: https://lore.kernel.org/r/20221110064552.22504-1-noor.azura.ahmad.tarmizi@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8273e6a175c8..6b43da78cdf0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -6548,6 +6548,9 @@ void stmmac_xdp_release(struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); u32 chan; + /* Ensure tx function is not running */ + netif_tx_disable(dev); + /* Disable NAPI process */ stmmac_disable_all_queues(priv); From 0834ced65a6a1eaa10d0b319b685879a671b29aa Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Thu, 10 Nov 2022 17:03:29 +0800 Subject: [PATCH 193/963] net/tls: Fix memory leak in tls_enc_skb() and tls_sw_fallback_init() 'aead_req' and 'aead_send' is allocated but not freed in default switch case. This commit fixes the potential memory leak by freeing them under the situation. Note that the default cases here should never be reached as they'd mean we allowed offloading an unsupported algorithm. Fixes: ea7a9d88ba21 ("net/tls: Use cipher sizes structs") Signed-off-by: Yu Liao Reviewed-by: Gal Pressman Link: https://lore.kernel.org/r/20221110090329.2036382-1-liaoyu15@huawei.com Signed-off-by: Jakub Kicinski --- net/tls/tls_device_fallback.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index cdb391a8754b..7fbb1d0b69b3 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -346,7 +346,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, salt = tls_ctx->crypto_send.aes_gcm_256.salt; break; default: - return NULL; + goto free_req; } cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type]; buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE + @@ -492,7 +492,8 @@ int tls_sw_fallback_init(struct sock *sk, key = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->key; break; default: - return -EINVAL; + rc = -EINVAL; + goto free_aead; } cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type]; From 9cbd48d5fa14e4c65f8580de16686077f7cea02b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Thu, 10 Nov 2022 13:31:35 +0800 Subject: [PATCH 194/963] mctp i2c: don't count unused / invalid keys for flow release We're currently hitting the WARN_ON in mctp_i2c_flow_release: if (midev->release_count > midev->i2c_lock_count) { WARN_ONCE(1, "release count overflow"); This may be hit if we expire a flow before sending the first packet it contains - as we will not be pairing the increment of release_count (performed on flow release) with the i2c lock operation (only performed on actual TX). To fix this, only release a flow if we've encountered it previously (ie, dev_flow_state does not indicate NEW), as we will mark the flow as ACTIVE at the same time as accounting for the i2c lock operation. We also need to add an INVALID flow state, to indicate when we've done the release. Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver") Reported-by: Jian Zhang Tested-by: Jian Zhang Signed-off-by: Jeremy Kerr Link: https://lore.kernel.org/r/20221110053135.329071-1-jk@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-i2c.c | 47 +++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index 0762c735dd8a..1d67a3ca1fd1 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -43,6 +43,7 @@ enum { MCTP_I2C_FLOW_STATE_NEW = 0, MCTP_I2C_FLOW_STATE_ACTIVE, + MCTP_I2C_FLOW_STATE_INVALID, }; /* List of all struct mctp_i2c_client @@ -374,12 +375,18 @@ mctp_i2c_get_tx_flow_state(struct mctp_i2c_dev *midev, struct sk_buff *skb) */ if (!key->valid) { state = MCTP_I2C_TX_FLOW_INVALID; - - } else if (key->dev_flow_state == MCTP_I2C_FLOW_STATE_NEW) { - key->dev_flow_state = MCTP_I2C_FLOW_STATE_ACTIVE; - state = MCTP_I2C_TX_FLOW_NEW; } else { - state = MCTP_I2C_TX_FLOW_EXISTING; + switch (key->dev_flow_state) { + case MCTP_I2C_FLOW_STATE_NEW: + key->dev_flow_state = MCTP_I2C_FLOW_STATE_ACTIVE; + state = MCTP_I2C_TX_FLOW_NEW; + break; + case MCTP_I2C_FLOW_STATE_ACTIVE: + state = MCTP_I2C_TX_FLOW_EXISTING; + break; + default: + state = MCTP_I2C_TX_FLOW_INVALID; + } } spin_unlock_irqrestore(&key->lock, flags); @@ -617,21 +624,31 @@ static void mctp_i2c_release_flow(struct mctp_dev *mdev, { struct mctp_i2c_dev *midev = netdev_priv(mdev->dev); + bool queue_release = false; unsigned long flags; spin_lock_irqsave(&midev->lock, flags); - midev->release_count++; + /* if we have seen the flow/key previously, we need to pair the + * original lock with a release + */ + if (key->dev_flow_state == MCTP_I2C_FLOW_STATE_ACTIVE) { + midev->release_count++; + queue_release = true; + } + key->dev_flow_state = MCTP_I2C_FLOW_STATE_INVALID; spin_unlock_irqrestore(&midev->lock, flags); - /* Ensure we have a release operation queued, through the fake - * marker skb - */ - spin_lock(&midev->tx_queue.lock); - if (!midev->unlock_marker.next) - __skb_queue_tail(&midev->tx_queue, &midev->unlock_marker); - spin_unlock(&midev->tx_queue.lock); - - wake_up(&midev->tx_wq); + if (queue_release) { + /* Ensure we have a release operation queued, through the fake + * marker skb + */ + spin_lock(&midev->tx_queue.lock); + if (!midev->unlock_marker.next) + __skb_queue_tail(&midev->tx_queue, + &midev->unlock_marker); + spin_unlock(&midev->tx_queue.lock); + wake_up(&midev->tx_wq); + } } static const struct net_device_ops mctp_i2c_ops = { From c234ba8042920fa83635808dc5673f36869ca280 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 4 Nov 2022 15:29:53 -0700 Subject: [PATCH 195/963] PCI: hv: Only reuse existing IRTE allocation for Multi-MSI Jeffrey added Multi-MSI support to the pci-hyperv driver by the 4 patches: 08e61e861a0e ("PCI: hv: Fix multi-MSI to allow more than one MSI vector") 455880dfe292 ("PCI: hv: Fix hv_arch_irq_unmask() for multi-MSI") b4b77778ecc5 ("PCI: hv: Reuse existing IRTE allocation in compose_msi_msg()") a2bad844a67b ("PCI: hv: Fix interrupt mapping for multi-MSI") It turns out that the third patch (b4b77778ecc5) causes a performance regression because all the interrupts now happen on 1 physical CPU (or two pCPUs, if one pCPU doesn't have enough vectors). When a guest has many PCI devices, it may suffer from soft lockups if the workload is heavy, e.g., see https://lwn.net/ml/linux-kernel/20220804025104.15673-1-decui@microsoft.com/ Commit b4b77778ecc5 itself is good. The real issue is that the hypercall in hv_irq_unmask() -> hv_arch_irq_unmask() -> hv_do_hypercall(HVCALL_RETARGET_INTERRUPT...) only changes the target virtual CPU rather than physical CPU; with b4b77778ecc5, the pCPU is determined only once in hv_compose_msi_msg() where only vCPU0 is specified; consequently the hypervisor only uses 1 target pCPU for all the interrupts. Note: before b4b77778ecc5, the pCPU is determined twice, and when the pCPU is determined the second time, the vCPU in the effective affinity mask is used (i.e., it isn't always vCPU0), so the hypervisor chooses different pCPU for each interrupt. The hypercall will be fixed in future to update the pCPU as well, but that will take quite a while, so let's restore the old behavior in hv_compose_msi_msg(), i.e., don't reuse the existing IRTE allocation for single-MSI and MSI-X; for multi-MSI, we choose the vCPU in a round-robin manner for each PCI device, so the interrupts of different devices can happen on different pCPUs, though the interrupts of each device happen on some single pCPU. The hypercall fix may not be backported to all old versions of Hyper-V, so we want to have this guest side change forever (or at least till we're sure the old affected versions of Hyper-V are no longer supported). Fixes: b4b77778ecc5 ("PCI: hv: Reuse existing IRTE allocation in compose_msi_msg()") Co-developed-by: Jeffrey Hugo Signed-off-by: Jeffrey Hugo Co-developed-by: Carl Vanderlip Signed-off-by: Carl Vanderlip Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20221104222953.11356-1-decui@microsoft.com Signed-off-by: Wei Liu --- drivers/pci/controller/pci-hyperv.c | 90 ++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 15 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ba64284eaf9f..f1ec8931dfbc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1613,7 +1613,7 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp, } static u32 hv_compose_msi_req_v1( - struct pci_create_interrupt *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt *int_pkt, u32 slot, u8 vector, u16 vector_count) { int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; @@ -1631,6 +1631,35 @@ static u32 hv_compose_msi_req_v1( return sizeof(*int_pkt); } +/* + * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and + * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be + * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V + * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is + * not irrelevant because Hyper-V chooses the physical CPU to handle the + * interrupts based on the vCPU specified in message sent to the vPCI VSP in + * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest, + * but assigning too many vPCI device interrupts to the same pCPU can cause a + * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V + * to spread out the pCPUs that it selects. + * + * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu() + * to always return the same dummy vCPU, because a second call to + * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a + * new pCPU for the interrupt. But for the multi-MSI case, the second call to + * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the + * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that + * the pCPUs are spread out. All interrupts for a multi-MSI device end up using + * the same pCPU, even though the vCPUs will be spread out by later calls + * to hv_irq_unmask(), but that is the best we can do now. + * + * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not* + * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an + * enhancement is planned for a future version. With that enhancement, the + * dummy vCPU selection won't matter, and interrupts for the same multi-MSI + * device will be spread across multiple pCPUs. + */ + /* * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten * by subsequent retarget in hv_irq_unmask(). @@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity) return cpumask_first_and(affinity, cpu_online_mask); } -static u32 hv_compose_msi_req_v2( - struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity, - u32 slot, u8 vector, u16 vector_count) +/* + * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0. + */ +static int hv_compose_multi_msi_req_get_cpu(void) { + static DEFINE_SPINLOCK(multi_msi_cpu_lock); + + /* -1 means starting with CPU 0 */ + static int cpu_next = -1; + + unsigned long flags; int cpu; + spin_lock_irqsave(&multi_msi_cpu_lock, flags); + + cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids, + false); + cpu = cpu_next; + + spin_unlock_irqrestore(&multi_msi_cpu_lock, flags); + + return cpu; +} + +static u32 hv_compose_msi_req_v2( + struct pci_create_interrupt2 *int_pkt, int cpu, + u32 slot, u8 vector, u16 vector_count) +{ int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2( } static u32 hv_compose_msi_req_v3( - struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity, + struct pci_create_interrupt3 *int_pkt, int cpu, u32 slot, u32 vector, u16 vector_count) { - int cpu; - int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3; int_pkt->wslot.slot = slot; int_pkt->int_desc.vector = vector; int_pkt->int_desc.reserved = 0; int_pkt->int_desc.vector_count = vector_count; int_pkt->int_desc.delivery_mode = DELIVERY_MODE; - cpu = hv_compose_msi_req_get_cpu(affinity); int_pkt->int_desc.processor_array[0] = hv_cpu_number_to_vp_number(cpu); int_pkt->int_desc.processor_count = 1; @@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct pci_create_interrupt3 v3; } int_pkts; } __packed ctxt; + bool multi_msi; u64 trans_id; u32 size; int ret; + int cpu; + + msi_desc = irq_data_get_msi_desc(data); + multi_msi = !msi_desc->pci.msi_attrib.is_msix && + msi_desc->nvec_used > 1; /* Reuse the previous allocation */ - if (data->chip_data) { + if (data->chip_data && multi_msi) { int_desc = data->chip_data; msg->address_hi = int_desc->address >> 32; msg->address_lo = int_desc->address & 0xffffffff; @@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return; } - msi_desc = irq_data_get_msi_desc(data); pdev = msi_desc_to_pci_dev(msi_desc); dest = irq_data_get_effective_affinity_mask(data); pbus = pdev->bus; @@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (!hpdev) goto return_null_message; + /* Free any previous message that might have already been composed. */ + if (data->chip_data && !multi_msi) { + int_desc = data->chip_data; + data->chip_data = NULL; + hv_int_desc_free(hpdev, int_desc); + } + int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC); if (!int_desc) goto drop_reference; - if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) { + if (multi_msi) { /* * If this is not the first MSI of Multi MSI, we already have * a mapping. Can exit early. @@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) */ vector = 32; vector_count = msi_desc->nvec_used; + cpu = hv_compose_multi_msi_req_get_cpu(); } else { vector = hv_msi_get_int_vector(data); vector_count = 1; + cpu = hv_compose_msi_req_get_cpu(dest); } /* @@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) switch (hbus->protocol_version) { case PCI_PROTOCOL_VERSION_1_1: size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, - dest, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_2: case PCI_PROTOCOL_VERSION_1_3: size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, - dest, + cpu, hpdev->desc.win_slot.slot, (u8)vector, vector_count); @@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) case PCI_PROTOCOL_VERSION_1_4: size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3, - dest, + cpu, hpdev->desc.win_slot.slot, vector, vector_count); From ad72c3c3f6eb81d2cb189ec71e888316adada5df Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 12 Nov 2022 15:12:23 +0100 Subject: [PATCH 196/963] ALSA: usb-audio: Drop snd_BUG_ON() from snd_usbmidi_output_open() snd_usbmidi_output_open() has a check of the NULL port with snd_BUG_ON(). snd_BUG_ON() was used as this shouldn't have happened, but in reality, the NULL port may be seen when the device gives an invalid endpoint setup at the descriptor, hence the driver skips the allocation. That is, the check itself is valid and snd_BUG_ON() should be dropped from there. Otherwise it's confusing as if it were a real bug, as recently syzbot stumbled on it. Reported-by: syzbot+9abda841d636d86c41da@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/syzbot+9abda841d636d86c41da@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20221112141223.6144-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index bbff0923d264..2839f6b6f09b 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1133,10 +1133,8 @@ static int snd_usbmidi_output_open(struct snd_rawmidi_substream *substream) port = &umidi->endpoints[i].out->ports[j]; break; } - if (!port) { - snd_BUG(); + if (!port) return -ENXIO; - } substream->runtime->private_data = port; port->state = STATE_UNKNOWN; From 58143c1ed5882c138a3cd2251a336fc8755f23d9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 7 Nov 2022 15:19:46 +0000 Subject: [PATCH 197/963] iio: health: afe4403: Fix oob read in afe4403_read_raw KASAN report out-of-bounds read as follows: BUG: KASAN: global-out-of-bounds in afe4403_read_raw+0x42e/0x4c0 Read of size 4 at addr ffffffffc02ac638 by task cat/279 Call Trace: afe4403_read_raw iio_read_channel_info dev_attr_show The buggy address belongs to the variable: afe4403_channel_leds+0x18/0xffffffffffffe9e0 This issue can be reproduced by singe command: $ cat /sys/bus/spi/devices/spi0.0/iio\:device0/in_intensity6_raw The array size of afe4403_channel_leds is less than channels, so access with chan->address cause OOB read in afe4403_read_raw. Fix it by moving access before use it. Fixes: b36e8257641a ("iio: health/afe440x: Use regmap fields") Signed-off-by: Wei Yongjun Acked-by: Andrew Davis Link: https://lore.kernel.org/r/20221107151946.89260-1-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/health/afe4403.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/health/afe4403.c b/drivers/iio/health/afe4403.c index 3bb4028c5d74..df3bc5c3d378 100644 --- a/drivers/iio/health/afe4403.c +++ b/drivers/iio/health/afe4403.c @@ -245,14 +245,14 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4403_data *afe = iio_priv(indio_dev); - unsigned int reg = afe4403_channel_values[chan->address]; - unsigned int field = afe4403_channel_leds[chan->address]; + unsigned int reg, field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + reg = afe4403_channel_values[chan->address]; ret = afe4403_read(afe, reg, val); if (ret) return ret; @@ -262,6 +262,7 @@ static int afe4403_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + field = afe4403_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[field], val); if (ret) return ret; From fc92d9e3de0b2d30a3ccc08048a5fad533e4672b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 7 Nov 2022 15:20:10 +0000 Subject: [PATCH 198/963] iio: health: afe4404: Fix oob read in afe4404_[read|write]_raw KASAN report out-of-bounds read as follows: BUG: KASAN: global-out-of-bounds in afe4404_read_raw+0x2ce/0x380 Read of size 4 at addr ffffffffc00e4658 by task cat/278 Call Trace: afe4404_read_raw iio_read_channel_info dev_attr_show The buggy address belongs to the variable: afe4404_channel_leds+0x18/0xffffffffffffe9c0 This issue can be reproduce by singe command: $ cat /sys/bus/i2c/devices/0-0058/iio\:device0/in_intensity6_raw The array size of afe4404_channel_leds and afe4404_channel_offdacs are less than channels, so access with chan->address cause OOB read in afe4404_[read|write]_raw. Fix it by moving access before use them. Fixes: b36e8257641a ("iio: health/afe440x: Use regmap fields") Signed-off-by: Wei Yongjun Acked-by: Andrew Davis Link: https://lore.kernel.org/r/20221107152010.95937-1-weiyongjun@huaweicloud.com Signed-off-by: Jonathan Cameron --- drivers/iio/health/afe4404.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c index 8fca787b2524..836da31b7e30 100644 --- a/drivers/iio/health/afe4404.c +++ b/drivers/iio/health/afe4404.c @@ -250,20 +250,20 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int value_reg = afe4404_channel_values[chan->address]; - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int value_reg, led_field, offdac_field; int ret; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_RAW: + value_reg = afe4404_channel_values[chan->address]; ret = regmap_read(afe->regmap, value_reg, val); if (ret) return ret; return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; ret = regmap_field_read(afe->fields[offdac_field], val); if (ret) return ret; @@ -273,6 +273,7 @@ static int afe4404_read_raw(struct iio_dev *indio_dev, case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; ret = regmap_field_read(afe->fields[led_field], val); if (ret) return ret; @@ -295,19 +296,20 @@ static int afe4404_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct afe4404_data *afe = iio_priv(indio_dev); - unsigned int led_field = afe4404_channel_leds[chan->address]; - unsigned int offdac_field = afe4404_channel_offdacs[chan->address]; + unsigned int led_field, offdac_field; switch (chan->type) { case IIO_INTENSITY: switch (mask) { case IIO_CHAN_INFO_OFFSET: + offdac_field = afe4404_channel_offdacs[chan->address]; return regmap_field_write(afe->fields[offdac_field], val); } break; case IIO_CURRENT: switch (mask) { case IIO_CHAN_INFO_RAW: + led_field = afe4404_channel_leds[chan->address]; return regmap_field_write(afe->fields[led_field], val); } break; From 6ac12303572ef9ace5603c2c07f5f1b00a33f580 Mon Sep 17 00:00:00 2001 From: Paul Gazzillo Date: Thu, 10 Nov 2022 16:47:29 -0500 Subject: [PATCH 199/963] iio: light: rpr0521: add missing Kconfig dependencies Fix an implicit declaration of function error for rpr0521 under some configs When CONFIG_RPR0521 is enabled without CONFIG_IIO_TRIGGERED_BUFFER, the build results in "implicit declaration of function" errors, e.g., drivers/iio/light/rpr0521.c:434:3: error: implicit declaration of function 'iio_trigger_poll_chained' [-Werror=implicit-function-declaration] 434 | iio_trigger_poll_chained(data->drdy_trigger0); | ^~~~~~~~~~~~~~~~~~~~~~~~ This fix adds select dependencies to RPR0521's configuration declaration. Fixes: e12ffd241c00 ("iio: light: rpr0521 triggered buffer") Signed-off-by: Paul Gazzillo Link: https://bugzilla.kernel.org/show_bug.cgi?id=216678 Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221110214729.ls5ixav5kxpeftk7@device Signed-off-by: Jonathan Cameron --- drivers/iio/light/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 7cf6e8490123..0d4447df7200 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -293,6 +293,8 @@ config RPR0521 tristate "ROHM RPR0521 ALS and proximity sensor driver" depends on I2C select REGMAP_I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here if you want to build support for ROHM's RPR0521 ambient light and proximity sensor device. From 20690cd50e68c0313472c7539460168b8ea6444d Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Thu, 10 Nov 2022 09:07:26 +0800 Subject: [PATCH 200/963] iio: accel: bma400: Fix memory leak in bma400_get_steps_reg() When regmap_bulk_read() fails, it does not free steps_raw, which will cause a memory leak issue, this patch fixes it. Fixes: d221de60eee3 ("iio: accel: bma400: Add separate channel for step counter") Signed-off-by: Dong Chenchen Reviewed-by: Jagath Jog J Link: https://lore.kernel.org/r/20221110010726.235601-1-dongchenchen2@huawei.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bma400_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index 490c342ef72a..6e4d10a7cd32 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -805,8 +805,10 @@ static int bma400_get_steps_reg(struct bma400_data *data, int *val) ret = regmap_bulk_read(data->regmap, BMA400_STEP_CNT0_REG, steps_raw, BMA400_STEP_RAW_LEN); - if (ret) + if (ret) { + kfree(steps_raw); return ret; + } *val = get_unaligned_le24(steps_raw); kfree(steps_raw); return IIO_VAL_INT; From 4ad09d956f8eacff61e67e5b13ba8ebec3232f76 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 8 Nov 2022 11:28:02 +0800 Subject: [PATCH 201/963] iio: core: Fix entry not deleted when iio_register_sw_trigger_type() fails In iio_register_sw_trigger_type(), configfs_register_default_group() is possible to fail, but the entry add to iio_trigger_types_list is not deleted. This leaves wild in iio_trigger_types_list, which can cause page fault when module is loading again. So fix this by list_del(&t->list) in error path. BUG: unable to handle page fault for address: fffffbfff81d7400 Call Trace: iio_register_sw_trigger_type do_one_initcall do_init_module load_module ... Fixes: b662f809d410 ("iio: core: Introduce IIO software triggers") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221108032802.168623-1-chenzhongjin@huawei.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-sw-trigger.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iio/industrialio-sw-trigger.c b/drivers/iio/industrialio-sw-trigger.c index 994f03a71520..d86a3305d9e8 100644 --- a/drivers/iio/industrialio-sw-trigger.c +++ b/drivers/iio/industrialio-sw-trigger.c @@ -58,8 +58,12 @@ int iio_register_sw_trigger_type(struct iio_sw_trigger_type *t) t->group = configfs_register_default_group(iio_triggers_group, t->name, &iio_trigger_type_group_type); - if (IS_ERR(t->group)) + if (IS_ERR(t->group)) { + mutex_lock(&iio_trigger_types_lock); + list_del(&t->list); + mutex_unlock(&iio_trigger_types_lock); ret = PTR_ERR(t->group); + } return ret; } From 145900cf91c4b32ac05dbc8675a0c7f4a278749d Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 9 Nov 2022 11:22:50 +0000 Subject: [PATCH 202/963] i2c: npcm7xx: Fix error handling in npcm_i2c_init() A problem about i2c-npcm7xx create debugfs failed is triggered with the following log given: [ 173.827310] debugfs: Directory 'npcm_i2c' with parent '/' already present! The reason is that npcm_i2c_init() returns platform_driver_register() directly without checking its return value, if platform_driver_register() failed, it returns without destroy the newly created debugfs, resulting the debugfs of npcm_i2c can never be created later. npcm_i2c_init() debugfs_create_dir() # create debugfs directory platform_driver_register() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when platform_driver_register() returns error. Fixes: 56a1485b102e ("i2c: npcm7xx: Add Nuvoton NPCM I2C controller driver") Signed-off-by: Yuan Can Reviewed-by: Tali Perry Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-npcm7xx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 0c365b57d957..83457359ec45 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2393,8 +2393,17 @@ static struct platform_driver npcm_i2c_bus_driver = { static int __init npcm_i2c_init(void) { + int ret; + npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - return platform_driver_register(&npcm_i2c_bus_driver); + + ret = platform_driver_register(&npcm_i2c_bus_driver); + if (ret) { + debugfs_remove_recursive(npcm_i2c_debugfs_dir); + return ret; + } + + return 0; } module_init(npcm_i2c_init); From 562105c1b072411c71ac2202410d83ee79297624 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 7 Nov 2022 00:15:13 +0800 Subject: [PATCH 203/963] arm64: dts: rockchip: Fix Pine64 Quartz4-B PMIC interrupt Ths PMIC's interrupt line is tied to GPIO0_A3. This is described correctly for the pinmux setting, but incorrectly for the interrupt. Correct the interrupt setting so that interrupts from the PMIC get delivered. Fixes: dcc8c66bef79 ("arm64: dts: rockchip: add Pine64 Quartz64-B device tree") Signed-off-by: Chen-Yu Tsai Reviewed-by: Peter Geis Link: https://lore.kernel.org/r/20221106161513.4140-1-wens@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index 77b179cd20e7..b276eb0810c7 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -246,7 +246,7 @@ rk809: pmic@20 { compatible = "rockchip,rk809"; reg = <0x20>; interrupt-parent = <&gpio0>; - interrupts = ; + interrupts = ; assigned-clocks = <&cru I2S1_MCLKOUT_TX>; assigned-clock-parents = <&cru CLK_I2S1_8CH_TX>; clock-names = "mclk"; From 836fb30949d9edf91d7de696a884ceeae7e426d2 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Fri, 28 Oct 2022 12:14:18 +0800 Subject: [PATCH 204/963] soc: imx8m: Enable OCOTP clock before reading the register Commit 7d981405d0fd ("soc: imx8m: change to use platform driver") ever removed the dependency on bootloader for enabling OCOTP clock. It helped to fix a kexec kernel hang issue. But unfortunately it caused a regression on CAAM driver and got reverted. This is the second try to enable the OCOTP clock by directly calling clock API instead of indirectly enabling the clock via nvmem API. Fixes: ac34de14ac30 ("Revert "soc: imx8m: change to use platform driver"") Signed-off-by: Xiaolei Wang Reviewed-by: Lucas Stach Signed-off-by: Shawn Guo --- drivers/soc/imx/soc-imx8m.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index cc57a384d74d..28144c699b0c 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -11,6 +11,7 @@ #include #include #include +#include #define REV_B1 0x21 @@ -56,6 +57,7 @@ static u32 __init imx8mq_soc_revision(void) void __iomem *ocotp_base; u32 magic; u32 rev; + struct clk *clk; np = of_find_compatible_node(NULL, NULL, "fsl,imx8mq-ocotp"); if (!np) @@ -63,6 +65,13 @@ static u32 __init imx8mq_soc_revision(void) ocotp_base = of_iomap(np, 0); WARN_ON(!ocotp_base); + clk = of_clk_get_by_name(np, NULL); + if (!clk) { + WARN_ON(!clk); + return 0; + } + + clk_prepare_enable(clk); /* * SOC revision on older imx8mq is not available in fuses so query @@ -79,6 +88,8 @@ static u32 __init imx8mq_soc_revision(void) soc_uid <<= 32; soc_uid |= readl_relaxed(ocotp_base + OCOTP_UID_LOW); + clk_disable_unprepare(clk); + clk_put(clk); iounmap(ocotp_base); of_node_put(np); From 448dca8c88755b768552e19bd1618be34ef6d1ff Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 2 Nov 2022 09:06:35 -0400 Subject: [PATCH 205/963] rseq: Use pr_warn_once() when deprecated/unknown ABI flags are encountered These commits use WARN_ON_ONCE() and kill the offending processes when deprecated and unknown flags are encountered: commit c17a6ff93213 ("rseq: Kill process when unknown flags are encountered in ABI structures") commit 0190e4198e47 ("rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_* flags") The WARN_ON_ONCE() triggered by userspace input prevents use of Syzkaller to fuzz the rseq system call. Replace this WARN_ON_ONCE() by pr_warn_once() messages which contain actually useful information. Reported-by: Mark Rutland Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Acked-by: Paul E. McKenney Link: https://lkml.kernel.org/r/20221102130635.7379-1-mathieu.desnoyers@efficios.com --- kernel/rseq.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kernel/rseq.c b/kernel/rseq.c index bda8175f8f99..d38ab944105d 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -171,12 +171,27 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) return 0; } +static bool rseq_warn_flags(const char *str, u32 flags) +{ + u32 test_flags; + + if (!flags) + return false; + test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS; + if (test_flags) + pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str); + test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS; + if (test_flags) + pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str); + return true; +} + static int rseq_need_restart(struct task_struct *t, u32 cs_flags) { u32 flags, event_mask; int ret; - if (WARN_ON_ONCE(cs_flags & RSEQ_CS_NO_RESTART_FLAGS) || cs_flags) + if (rseq_warn_flags("rseq_cs", cs_flags)) return -EINVAL; /* Get thread flags. */ @@ -184,7 +199,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) if (ret) return ret; - if (WARN_ON_ONCE(flags & RSEQ_CS_NO_RESTART_FLAGS) || flags) + if (rseq_warn_flags("rseq", flags)) return -EINVAL; /* From 91dabf33ae5df271da63e87ad7833e5fdb4a44b9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 26 Oct 2022 13:43:00 +0200 Subject: [PATCH 206/963] sched: Fix race in task_call_func() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a very narrow race between schedule() and task_call_func(). CPU0 CPU1 __schedule() rq_lock(); prev_state = READ_ONCE(prev->__state); if (... && prev_state) { deactivate_tasl(rq, prev, ...) prev->on_rq = 0; task_call_func() raw_spin_lock_irqsave(p->pi_lock); state = READ_ONCE(p->__state); smp_rmb(); if (... || p->on_rq) // false!!! rq = __task_rq_lock() ret = func(); next = pick_next_task(); rq = context_switch(prev, next) prepare_lock_switch() spin_release(&__rq_lockp(rq)->dep_map...) So while the task is on it's way out, it still holds rq->lock for a little while, and right then task_call_func() comes in and figures it doesn't need rq->lock anymore (because the task is already dequeued -- but still running there) and then the __set_task_frozen() thing observes it's holding rq->lock and yells murder. Avoid this by waiting for p->on_cpu to get cleared, which guarantees the task is fully finished on the old CPU. ( While arguably the fixes tag is 'wrong' -- none of the previous task_call_func() users appears to care for this case. ) Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") Reported-by: Ville Syrjälä Signed-off-by: Peter Zijlstra (Intel) Tested-by: Ville Syrjälä Link: https://lkml.kernel.org/r/Y1kdRNNfUeAU+FNl@hirez.programming.kicks-ass.net --- kernel/sched/core.c | 52 ++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cb2aa2b54c7a..daff72f00385 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4200,6 +4200,40 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) return success; } +static bool __task_needs_rq_lock(struct task_struct *p) +{ + unsigned int state = READ_ONCE(p->__state); + + /* + * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when + * the task is blocked. Make sure to check @state since ttwu() can drop + * locks at the end, see ttwu_queue_wakelist(). + */ + if (state == TASK_RUNNING || state == TASK_WAKING) + return true; + + /* + * Ensure we load p->on_rq after p->__state, otherwise it would be + * possible to, falsely, observe p->on_rq == 0. + * + * See try_to_wake_up() for a longer comment. + */ + smp_rmb(); + if (p->on_rq) + return true; + +#ifdef CONFIG_SMP + /* + * Ensure the task has finished __schedule() and will not be referenced + * anymore. Again, see try_to_wake_up() for a longer comment. + */ + smp_rmb(); + smp_cond_load_acquire(&p->on_cpu, !VAL); +#endif + + return false; +} + /** * task_call_func - Invoke a function on task in fixed state * @p: Process for which the function is to be invoked, can be @current. @@ -4217,28 +4251,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) int task_call_func(struct task_struct *p, task_call_f func, void *arg) { struct rq *rq = NULL; - unsigned int state; struct rq_flags rf; int ret; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); - state = READ_ONCE(p->__state); - - /* - * Ensure we load p->on_rq after p->__state, otherwise it would be - * possible to, falsely, observe p->on_rq == 0. - * - * See try_to_wake_up() for a longer comment. - */ - smp_rmb(); - - /* - * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when - * the task is blocked. Make sure to check @state since ttwu() can drop - * locks at the end, see ttwu_queue_wakelist(). - */ - if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq) + if (__task_needs_rq_lock(p)) rq = __task_rq_lock(p, &rf); /* From 3a4e894f36f95c63048298aaefee6e3e1a7f6a68 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 13 Oct 2022 14:46:39 -0700 Subject: [PATCH 207/963] MAINTAINERS: git://github.com -> https://github.com for ceph Github deprecated the git:// links about a year ago, so let's move to the https:// URLs instead. Reported-by: Conor Dooley Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/ Signed-off-by: Palmer Dabbelt Signed-off-by: Ilya Dryomov --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 256f03904987..9afe57eea888 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4809,7 +4809,7 @@ R: Jeff Layton L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ -T: git git://github.com/ceph/ceph-client.git +T: git https://github.com/ceph/ceph-client.git F: include/linux/ceph/ F: include/linux/crush/ F: net/ceph/ @@ -4821,7 +4821,7 @@ R: Jeff Layton L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ -T: git git://github.com/ceph/ceph-client.git +T: git https://github.com/ceph/ceph-client.git F: Documentation/filesystems/ceph.rst F: fs/ceph/ @@ -17222,7 +17222,7 @@ R: Dongsheng Yang L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ -T: git git://github.com/ceph/ceph-client.git +T: git https://github.com/ceph/ceph-client.git F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c F: drivers/block/rbd_types.h From f86a48667b91202d502d753c707e8576a6fe265b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 12:36:53 +0300 Subject: [PATCH 208/963] ceph: fix a NULL vs IS_ERR() check when calling ceph_lookup_inode() The ceph_lookup_inode() function returns error pointers. It never returns NULL. Fixes: aa87052dd965 ("ceph: fix incorrectly showing the .snap size for stat") Signed-off-by: Dan Carpenter Reviewed-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 4af5e55abc15..bad9eeb6a1a5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2492,7 +2492,7 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path, struct inode *parent; parent = ceph_lookup_inode(sb, ceph_ino(inode)); - if (!parent) + if (IS_ERR(parent)) return PTR_ERR(parent); pci = ceph_inode(parent); From 51884d153f7ec85e18d607b2467820a90e0f4359 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 9 Nov 2022 11:00:39 +0800 Subject: [PATCH 209/963] ceph: avoid putting the realm twice when decoding snaps fails When decoding the snaps fails it maybe leaving the 'first_realm' and 'realm' pointing to the same snaprealm memory. And then it'll put it twice and could cause random use-after-free, BUG_ON, etc issues. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/57686 Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/snap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 864cdaa0d2bd..e4151852184e 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -763,7 +763,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, struct ceph_mds_snap_realm *ri; /* encoded */ __le64 *snaps; /* encoded */ __le64 *prior_parent_snaps; /* encoded */ - struct ceph_snap_realm *realm = NULL; + struct ceph_snap_realm *realm; struct ceph_snap_realm *first_realm = NULL; struct ceph_snap_realm *realm_to_rebuild = NULL; int rebuild_snapcs; @@ -774,6 +774,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, dout("%s deletion=%d\n", __func__, deletion); more: + realm = NULL; rebuild_snapcs = 0; ceph_decode_need(&p, e, sizeof(*ri), bad); ri = p; From 5bd76b8de5b74fa941a6eafee87728a0fe072267 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 10 Nov 2022 21:01:59 +0800 Subject: [PATCH 210/963] ceph: fix NULL pointer dereference for req->r_session The request's r_session maybe changed when it was forwarded or resent. Both the forwarding and resending cases the requests will be protected by the mdsc->mutex. Cc: stable@vger.kernel.org Link: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 Signed-off-by: Xiubo Li Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 48 ++++++++++++------------------------------------ 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index fb023f9fafcb..e54814d0c2f7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2248,7 +2248,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; - unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2266,28 +2265,24 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock); - /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions array memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max_sessions = mdsc->max_sessions; - /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if ((req1 || req2) && likely(max_sessions)) { - struct ceph_mds_session **sessions = NULL; - struct ceph_mds_session *s; + if (req1 || req2) { struct ceph_mds_request *req; + struct ceph_mds_session **sessions; + struct ceph_mds_session *s; + unsigned int max_sessions; int i; + mutex_lock(&mdsc->mutex); + max_sessions = mdsc->max_sessions; + sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); if (!sessions) { + mutex_unlock(&mdsc->mutex); err = -ENOMEM; goto out; } @@ -2299,16 +2294,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2321,16 +2306,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) s = req->r_session; if (!s) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2342,11 +2317,12 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) /* the auth MDS */ spin_lock(&ci->i_ceph_lock); if (ci->i_auth_cap) { - s = ci->i_auth_cap->session; - if (!sessions[s->s_mds]) - sessions[s->s_mds] = ceph_get_mds_session(s); + s = ci->i_auth_cap->session; + if (!sessions[s->s_mds]) + sessions[s->s_mds] = ceph_get_mds_session(s); } spin_unlock(&ci->i_ceph_lock); + mutex_unlock(&mdsc->mutex); /* send flush mdlog request to MDSes */ for (i = 0; i < max_sessions; i++) { From 4f68332b2f89ab84285e5b6cb3d30e8b9894bef1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 7 Nov 2022 16:54:13 +0000 Subject: [PATCH 211/963] drm/i915/ttm: fix uaf with lmem_userfault_list handling In the fault handler, make sure we check if the BO maps lmem after we schedule the migration, since the current resource might change from lmem to smem, if the pages are in the non-cpu visible portion of lmem. This then leads to adding the object to the lmem_userfault_list even though the current resource is no longer lmem. If we then destroy the object, the list might still contain a link to the now free object, since we only remove it if the object is still in lmem. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7469 Fixes: ad74457a6b5a ("drm/i915/dgfx: Release mmap on rpm suspend") Signed-off-by: Matthew Auld Cc: Anshuman Gupta Cc: Rodrigo Vivi Cc: Andrzej Hajda Cc: Nirmoy Das Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20221107165414.56970-1-matthew.auld@intel.com (cherry picked from commit 625b74460ec0978979f883fbee117e1b97e6e35e) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index a4aa9500fa17..3d4305eea1aa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -1013,9 +1013,6 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - if (i915_ttm_cpu_maps_iomem(bo->resource)) - wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); - if (!i915_ttm_resource_mappable(bo->resource)) { int err = -ENODEV; int i; @@ -1042,6 +1039,9 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) } } + if (i915_ttm_cpu_maps_iomem(bo->resource)) + wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + if (drm_dev_enter(dev, &idx)) { ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot, TTM_BO_VM_NUM_PREFAULT); From 8979f428a4afc215e390006e5ea19fd4e22c7ca9 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 10 Nov 2022 18:30:37 +0800 Subject: [PATCH 212/963] net: liquidio: release resources when liquidio driver open failed When liquidio driver open failed, it doesn't release resources. Compile tested only. Fixes: 5b07aee11227 ("liquidio: MSIX support for CN23XX") Fixes: dbc97bfd3918 ("net: liquidio: Add missing null pointer checks") Signed-off-by: Zhengchao Shao Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index d312bd594935..75771825c3f9 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1794,13 +1794,10 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - if (OCTEON_CN23XX_PF(oct)) { - if (!oct->msix_on) - if (setup_tx_poll_fn(netdev)) - return -1; - } else { - if (setup_tx_poll_fn(netdev)) - return -1; + if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) { + ret = setup_tx_poll_fn(netdev); + if (ret) + goto err_poll; } netif_tx_start_all_queues(netdev); @@ -1813,7 +1810,7 @@ static int liquidio_open(struct net_device *netdev) /* tell Octeon to start forwarding packets to host */ ret = send_rx_ctrl_cmd(lio, 1); if (ret) - return ret; + goto err_rx_ctrl; /* start periodical statistics fetch */ INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats); @@ -1824,6 +1821,27 @@ static int liquidio_open(struct net_device *netdev) dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name); + return 0; + +err_rx_ctrl: + if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) + cleanup_tx_poll_fn(netdev); +err_poll: + if (lio->ptp_clock) { + ptp_clock_unregister(lio->ptp_clock); + lio->ptp_clock = NULL; + } + + if (oct->props[lio->ifidx].napi_enabled == 1) { + list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) + napi_disable(napi); + + oct->props[lio->ifidx].napi_enabled = 0; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 0; + } + return ret; } From 2d25107e111a85c56f601a5470f1780ec054e6ac Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Thu, 10 Nov 2022 19:38:23 +0800 Subject: [PATCH 213/963] mISDN: fix misuse of put_device() in mISDN_register_device() We should not release reference by put_device() before calling device_initialize(). Fixes: e7d1d4d9ac0d ("mISDN: fix possible memory leak in mISDN_register_device()") Signed-off-by: Wang ShaoBo Signed-off-by: David S. Miller --- drivers/isdn/mISDN/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 7ea0100f218a..90ee56d07a6e 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -222,7 +222,7 @@ mISDN_register_device(struct mISDNdevice *dev, err = get_free_devid(); if (err < 0) - goto error1; + return err; dev->id = err; device_initialize(&dev->dev); From 5df1341ea822292275c56744aab9c536d75c33be Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Fri, 11 Nov 2022 09:41:30 +0800 Subject: [PATCH 214/963] net: macvlan: Use built-in RCU list checking hlist_for_each_entry_rcu() has built-in RCU and lock checking. Pass cond argument to hlist_for_each_entry_rcu() to silence false lockdep warning when CONFIG_PROVE_RCU_LIST is enabled. Execute as follow: ip link add link eth0 type macvlan mode source macaddr add The rtnl_lock is held when macvlan_hash_lookup_source() or macvlan_fill_info_macaddr() are called in the non-RCU read side section. So, pass lockdep_rtnl_is_held() to silence false lockdep warning. Fixes: 79cf79abce71 ("macvlan: add source mode") Signed-off-by: Chuang Wang Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 578897aaada0..b8cc55b2d721 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -141,7 +141,7 @@ static struct macvlan_source_entry *macvlan_hash_lookup_source( u32 idx = macvlan_eth_hash(addr); struct hlist_head *h = &vlan->port->vlan_source_hash[idx]; - hlist_for_each_entry_rcu(entry, h, hlist) { + hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { if (ether_addr_equal_64bits(entry->addr, addr) && entry->vlan == vlan) return entry; @@ -1647,7 +1647,7 @@ static int macvlan_fill_info_macaddr(struct sk_buff *skb, struct hlist_head *h = &vlan->port->vlan_source_hash[i]; struct macvlan_source_entry *entry; - hlist_for_each_entry_rcu(entry, h, hlist) { + hlist_for_each_entry_rcu(entry, h, hlist, lockdep_rtnl_is_held()) { if (entry->vlan != vlan) continue; if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr)) From 8fbb53c8bfd8c56ecf1f78dc821778b58f505503 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 11 Nov 2022 09:47:34 +0800 Subject: [PATCH 215/963] net: caif: fix double disconnect client in chnl_net_open() When connecting to client timeout, disconnect client for twice in chnl_net_open(). Remove one. Compile tested only. Fixes: 2aa40aef9deb ("caif: Use link layer MTU instead of fixed MTU") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller --- net/caif/chnl_net.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 4d63ef13a1fd..f35fc87c453a 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -310,9 +310,6 @@ static int chnl_net_open(struct net_device *dev) if (result == 0) { pr_debug("connect timeout\n"); - caif_disconnect_client(dev_net(dev), &priv->chnl); - priv->state = CAIF_DISCONNECTED; - pr_debug("state disconnected\n"); result = -ETIMEDOUT; goto error; } From 991aef4ee4f6eb999924f429b943441a32835c8f Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 11 Nov 2022 15:04:33 +0800 Subject: [PATCH 216/963] bnxt_en: Remove debugfs when pci_register_driver failed When pci_register_driver failed, we need to remove debugfs, which will caused a resource leak, fix it. Resource leak logs as follows: [ 52.184456] debugfs: Directory 'bnxt_en' with parent '/' already present! Fixes: cabfb09d87bd ("bnxt_en: add debugfs support for DIM") Signed-off-by: Gaosheng Cui Reviewed-by: Leon Romanovsky Reviewed-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c78b6e9dea2c..9f8a6ce4b356 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -14037,8 +14037,16 @@ static struct pci_driver bnxt_pci_driver = { static int __init bnxt_init(void) { + int err; + bnxt_debug_init(); - return pci_register_driver(&bnxt_pci_driver); + err = pci_register_driver(&bnxt_pci_driver); + if (err) { + bnxt_debug_exit(); + return err; + } + + return 0; } static void __exit bnxt_exit(void) From 298b83e180d53a310f9b47e3bf13b7b583e75e9c Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Nov 2022 15:08:27 +0800 Subject: [PATCH 217/963] octeon_ep: delete unnecessary napi rollback under set_queues_err in octep_open() octep_napi_add() and octep_napi_enable() are all after netif_set_real_num_{tx,rx}_queues() in octep_open(), so it is unnecessary napi rollback under set_queues_err. Delete them to fix it. Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 9089adcb75f9..7985a748fafe 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -527,8 +527,6 @@ static int octep_open(struct net_device *netdev) return 0; set_queues_err: - octep_napi_disable(oct); - octep_napi_delete(oct); octep_clean_irqs(oct); setup_irq_err: octep_free_oqs(oct); From 9d3ff7131877fb092185c369fbb14b57ac4e7cec Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Nov 2022 15:08:47 +0800 Subject: [PATCH 218/963] octeon_ep: ensure octep_get_link_status() successfully before octep_link_up() octep_get_link_status() can fail because send mbox message failed, then octep_get_link_status() will return ret less than 0. Excute octep_link_up() as long as ret is not equal to 0 in octep_open() now. That is not correct. The value type of link.state is enum octep_ctrl_net_state. Positive value represents up. Excute octep_link_up() when ret is bigger than 0. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 7985a748fafe..546bcebf4462 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -521,7 +521,7 @@ static int octep_open(struct net_device *netdev) octep_oq_dbell_init(oct); ret = octep_get_link_status(oct); - if (ret) + if (ret > 0) octep_link_up(netdev); return 0; From e4041be97b15302ebfffda8bbd45f3b2d096048f Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Nov 2022 15:09:23 +0800 Subject: [PATCH 219/963] octeon_ep: fix potential memory leak in octep_device_setup() When occur unsupported_dev and mbox init errors, it did not free oct->conf and iounmap() oct->mmio[i].hw_addr. That would trigger memory leak problem. Add kfree() for oct->conf and iounmap() for oct->mmio[i].hw_addr under unsupported_dev and mbox init errors to fix the problem. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 546bcebf4462..53f288c32238 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -956,7 +956,7 @@ int octep_device_setup(struct octep_device *oct) ret = octep_ctrl_mbox_init(ctrl_mbox); if (ret) { dev_err(&pdev->dev, "Failed to initialize control mbox\n"); - return -1; + goto unsupported_dev; } oct->ctrl_mbox_ifstats_offset = OCTEP_CTRL_MBOX_SZ(ctrl_mbox->h2fq.elem_sz, ctrl_mbox->h2fq.elem_cnt, @@ -966,6 +966,10 @@ int octep_device_setup(struct octep_device *oct) return 0; unsupported_dev: + for (i = 0; i < OCTEP_MMIO_REGIONS; i++) + iounmap(oct->mmio[i].hw_addr); + + kfree(oct->conf); return -1; } From 848ffce2f0c93f3481052340a919123a21f808b6 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 11 Nov 2022 15:09:35 +0800 Subject: [PATCH 220/963] octeon_ep: ensure get mac address successfully before eth_hw_addr_set() octep_get_mac_addr() can fail because send mbox message failed. If this happens, octep_dev->mac_addr will be zero. It should not continue to initialize. Add exception handling for octep_get_mac_addr() to fix it. Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Ziyang Xuan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 53f288c32238..b45dd7f04e21 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1072,7 +1072,11 @@ static int octep_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->max_mtu = OCTEP_MAX_MTU; netdev->mtu = OCTEP_DEFAULT_MTU; - octep_get_mac_addr(octep_dev, octep_dev->mac_addr); + err = octep_get_mac_addr(octep_dev, octep_dev->mac_addr); + if (err) { + dev_err(&pdev->dev, "Failed to get mac address\n"); + goto register_dev_err; + } eth_hw_addr_set(netdev, octep_dev->mac_addr); err = register_netdev(netdev); From e17a025a47c66ca8499ae88d8046c4f0d7c9c057 Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Thu, 27 Oct 2022 09:32:00 +0200 Subject: [PATCH 221/963] drm/lima: Fix opp clkname setting in case of missing regulator Commit d8c32d3971e4 ("drm/lima: Migrate to dev_pm_opp_set_config()") introduced a regression as it may undo the clk_names setting in case the optional regulator is missing. This resulted in test and performance regressions with lima. Restore the old behavior where clk_names is set separately so it is not undone in case of a missing optional regulator. Fixes: d8c32d3971e4 ("drm/lima: Migrate to dev_pm_opp_set_config()") Acked-by: Viresh Kumar Signed-off-by: Erico Nunes Signed-off-by: Qiang Yu Link: https://patchwork.freedesktop.org/patch/msgid/20221027073200.3885839-1-nunes.erico@gmail.com --- drivers/gpu/drm/lima/lima_devfreq.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c index 011be7ff51e1..bc8fb4e38d0a 100644 --- a/drivers/gpu/drm/lima/lima_devfreq.c +++ b/drivers/gpu/drm/lima/lima_devfreq.c @@ -112,11 +112,6 @@ int lima_devfreq_init(struct lima_device *ldev) unsigned long cur_freq; int ret; const char *regulator_names[] = { "mali", NULL }; - const char *clk_names[] = { "core", NULL }; - struct dev_pm_opp_config config = { - .regulator_names = regulator_names, - .clk_names = clk_names, - }; if (!device_property_present(dev, "operating-points-v2")) /* Optional, continue without devfreq */ @@ -124,7 +119,15 @@ int lima_devfreq_init(struct lima_device *ldev) spin_lock_init(&ldevfreq->lock); - ret = devm_pm_opp_set_config(dev, &config); + /* + * clkname is set separately so it is not affected by the optional + * regulator setting which may return error. + */ + ret = devm_pm_opp_set_clkname(dev, "core"); + if (ret) + return ret; + + ret = devm_pm_opp_set_regulators(dev, regulator_names); if (ret) { /* Continue if the optional regulator is missing */ if (ret != -ENODEV) From f7c125bd79f50ec6094761090be81d02726ec6f4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 11 Nov 2022 09:20:44 +0000 Subject: [PATCH 222/963] net: mhi: Fix memory leak in mhi_net_dellink() MHI driver registers network device without setting the needs_free_netdev flag, and does NOT call free_netdev() when unregisters network device, which causes a memory leak. This patch calls free_netdev() to fix it since netdev_priv is used after unregister. Fixes: 13adac032982 ("net: mhi_net: Register wwan_ops for link creation") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/mhi_net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index 0b1b6f650104..0b9d37979133 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -343,6 +343,8 @@ static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev) kfree_skb(mhi_netdev->skbagg_head); + free_netdev(ndev); + dev_set_drvdata(&mhi_dev->dev, NULL); } From ed1fe1bebe18884b11e5536b5ac42e3a48960835 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 11 Nov 2022 23:10:20 +0200 Subject: [PATCH 223/963] net: dsa: make dsa_master_ioctl() see through port_hwtstamp_get() shims MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are multi-generational drivers like mv88e6xxx which have code like this: int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, struct ifreq *ifr) { if (!chip->info->ptp_support) return -EOPNOTSUPP; ... } DSA wants to deny PTP timestamping on the master if the switch supports timestamping too. However it currently relies on the presence of the port_hwtstamp_get() callback to determine PTP capability, and this clearly does not work in that case (method is present but returns -EOPNOTSUPP). We should not deny PTP on the DSA master for those switches which truly do not support hardware timestamping. Create a dsa_port_supports_hwtstamp() method which actually probes for support by calling port_hwtstamp_get() and seeing whether that returned -EOPNOTSUPP or not. Fixes: f685e609a301 ("net: dsa: Deny PTP on master if switch supports it") Link: https://patchwork.kernel.org/project/netdevbpf/patch/20221110124345.3901389-1-festevam@gmail.com/ Reported-by: Fabio Estevam Reported-by: Steffen Bätz Signed-off-by: Vladimir Oltean Tested-by: Fabio Estevam Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 1 + net/dsa/master.c | 3 +-- net/dsa/port.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 6e65c7ffd6f3..71e9707d11d4 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -210,6 +210,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, extern struct rtnl_link_ops dsa_link_ops __read_mostly; /* port.c */ +bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr); void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); diff --git a/net/dsa/master.c b/net/dsa/master.c index 40367ab41cf8..421de166515f 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -204,8 +204,7 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) * switch in the tree that is PTP capable. */ list_for_each_entry(dp, &dst->ports, list) - if (dp->ds->ops->port_hwtstamp_get || - dp->ds->ops->port_hwtstamp_set) + if (dsa_port_supports_hwtstamp(dp, ifr)) return -EBUSY; break; } diff --git a/net/dsa/port.c b/net/dsa/port.c index 208168276995..750fe68d9b2a 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -110,6 +110,22 @@ static bool dsa_port_can_configure_learning(struct dsa_port *dp) return !err; } +bool dsa_port_supports_hwtstamp(struct dsa_port *dp, struct ifreq *ifr) +{ + struct dsa_switch *ds = dp->ds; + int err; + + if (!ds->ops->port_hwtstamp_get || !ds->ops->port_hwtstamp_set) + return false; + + /* "See through" shim implementations of the "get" method. + * This will clobber the ifreq structure, but we will either return an + * error, or the master will overwrite it with proper values. + */ + err = ds->ops->port_hwtstamp_get(ds, dp->index, ifr); + return err != -EOPNOTSUPP; +} + int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) { struct dsa_switch *ds = dp->ds; From 2598ac6ec493566105f8a1029aa9de36ba6946ca Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 9 Nov 2022 19:28:31 +0000 Subject: [PATCH 224/963] arm64: ftrace: Define ftrace_stub_graph only with FUNCTION_GRAPH_TRACER The 0-day bot reports that arm64 builds with CONFIG_CFI_CLANG + CONFIG_FTRACE are broken when CONFIG_FUNCTION_GRAPH_TRACER is not enabled: ld.lld: error: undefined symbol: __kcfi_typeid_ftrace_stub_graph >>> referenced by entry-ftrace.S:299 (arch/arm64/kernel/entry-ftrace.S:299) >>> arch/arm64/kernel/entry-ftrace.o:(.text+0x48) in archive vmlinux.a This is caused by ftrace_stub_graph using SYM_TYPE_FUNC_START when the address of the function is not taken in any C translation unit. Fix the build by only defining ftrace_stub_graph when it's actually needed, i.e. with CONFIG_FUNCTION_GRAPH_TRACER. Link: https://lore.kernel.org/lkml/202210251659.tRMs78RH-lkp@intel.com/ Fixes: 883bbbffa5a4 ("ftrace,kcfi: Separate ftrace_stub() and ftrace_stub_graph()") Reported-by: kernel test robot Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Acked-by: Mark Rutland Reviewed-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20221109192831.3057131-1-samitolvanen@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-ftrace.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 795344ab4ec4..322a831f8ede 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -299,11 +299,11 @@ SYM_TYPED_FUNC_START(ftrace_stub) ret SYM_FUNC_END(ftrace_stub) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_TYPED_FUNC_START(ftrace_stub_graph) ret SYM_FUNC_END(ftrace_stub_graph) -#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * void return_to_handler(void) * From 639b2e2ff1e850eb4e8853b4dc233875108eec4b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:14:16 +0100 Subject: [PATCH 225/963] x86/xen: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/e91af3c8708af38b1c57e0a2d7eb9765dda0e963.1667336095.git.christophe.jaillet@wanadoo.fr Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 3 ++- arch/x86/xen/setup.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 0ad3d4bf52b3..34b05ee1523f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -113,7 +114,7 @@ static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE); static int __init parse_xen_msr_safe(char *str) { if (str) - return strtobool(str, &xen_msr_safe); + return kstrtobool(str, &xen_msr_safe); return -EINVAL; } early_param("xen_msr_safe", parse_xen_msr_safe); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 4f4309500559..8db26f10fb1d 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -85,7 +86,7 @@ static void __init xen_parse_512gb(void) arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit="); if (!arg) val = true; - else if (strtobool(arg + strlen("xen_512gb_limit="), &val)) + else if (kstrtobool(arg + strlen("xen_512gb_limit="), &val)) return; xen_512gb_limit = val; From da36a2a76b01b210ffaa55cdc2c99bc8783697c5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 10 Nov 2022 23:24:41 +0800 Subject: [PATCH 226/963] xen/pcpu: fix possible memory leak in register_pcpu() In device_add(), dev_set_name() is called to allocate name, if it returns error, the name need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(). Fixes: f65c9bb3fb72 ("xen/pcpu: Xen physical cpus online/offline sys interface") Signed-off-by: Yang Yingliang Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20221110152441.401630-1-yangyingliang@huawei.com Signed-off-by: Juergen Gross --- drivers/xen/pcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index 47aa3a1ccaf5..fd3a644b0855 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -228,7 +228,7 @@ static int register_pcpu(struct pcpu *pcpu) err = device_register(dev); if (err) { - pcpu_release(dev); + put_device(dev); return err; } From 5e29500eba2aa19e1323df46f64dafcd4a327092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Mon, 14 Nov 2022 11:31:08 +0100 Subject: [PATCH 227/963] xen-pciback: Allow setting PCI_MSIX_FLAGS_MASKALL too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Xen domain configures MSI-X, the usual approach is to enable MSI-X together with masking all of them via the config space, then fill the table and only then clear PCI_MSIX_FLAGS_MASKALL. Allow doing this via QEMU running in a stub domain. Previously, when changing PCI_MSIX_FLAGS_MASKALL was not allowed, the whole write was aborted, preventing change to the PCI_MSIX_FLAGS_ENABLE bit too. Note the Xen hypervisor intercepts this write anyway, and may keep the PCI_MSIX_FLAGS_MASKALL bit set if it wishes to. It will store the guest-requested state and will apply it eventually. Signed-off-by: Marek Marczykowski-Górecki Reviewed-by: Jan Beulich Link: https://lore.kernel.org/r/20221114103110.1519413-1-marmarek@invisiblethingslab.com Signed-off-by: Juergen Gross --- drivers/xen/xen-pciback/conf_space_capability.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 5e53b4817f16..097316a74126 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -190,13 +190,16 @@ static const struct config_field caplist_pm[] = { }; static struct msi_msix_field_config { - u16 enable_bit; /* bit for enabling MSI/MSI-X */ - unsigned int int_type; /* interrupt type for exclusiveness check */ + u16 enable_bit; /* bit for enabling MSI/MSI-X */ + u16 allowed_bits; /* bits allowed to be changed */ + unsigned int int_type; /* interrupt type for exclusiveness check */ } msi_field_config = { .enable_bit = PCI_MSI_FLAGS_ENABLE, + .allowed_bits = PCI_MSI_FLAGS_ENABLE, .int_type = INTERRUPT_TYPE_MSI, }, msix_field_config = { .enable_bit = PCI_MSIX_FLAGS_ENABLE, + .allowed_bits = PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL, .int_type = INTERRUPT_TYPE_MSIX, }; @@ -229,7 +232,7 @@ static int msi_msix_flags_write(struct pci_dev *dev, int offset, u16 new_value, return 0; if (!dev_data->allow_interrupt_control || - (new_value ^ old_value) & ~field_config->enable_bit) + (new_value ^ old_value) & ~field_config->allowed_bits) return PCIBIOS_SET_FAILED; if (new_value & field_config->enable_bit) { From c53717e1e3f0d0f9129b2e0dbc6dcc5e0a8132e9 Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Mon, 14 Nov 2022 19:21:24 +0800 Subject: [PATCH 228/963] xen/platform-pci: add missing free_irq() in error path free_irq() is missing in case of error in platform_pci_probe(), fix that. Signed-off-by: ruanjinjie Reviewed-by: Oleksandr Tyshchenko Link: https://lore.kernel.org/r/20221114112124.1965611-1-ruanjinjie@huawei.com Signed-off-by: Juergen Gross --- drivers/xen/platform-pci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 18f0ed8b1f93..6ebd819338ec 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -144,7 +144,7 @@ static int platform_pci_probe(struct pci_dev *pdev, if (ret) { dev_warn(&pdev->dev, "Unable to set the evtchn callback " "err=%d\n", ret); - goto out; + goto irq_out; } } @@ -152,13 +152,16 @@ static int platform_pci_probe(struct pci_dev *pdev, grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); ret = gnttab_setup_auto_xlat_frames(grant_frames); if (ret) - goto out; + goto irq_out; ret = gnttab_init(); if (ret) goto grant_out; return 0; grant_out: gnttab_free_auto_xlat_frames(); +irq_out: + if (!xen_have_vector_callback) + free_irq(pdev->irq, pdev); out: pci_release_region(pdev, 0); mem_out: From e1ff91f9d2303cd4e706cc908bfca21cd17b9927 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Fri, 11 Nov 2022 10:41:06 +0100 Subject: [PATCH 229/963] pinctrl: mediatek: Fix EINT pins input debounce time configuration The External Interrupt Controller (EINTC) on all of the supported MediaTek SoCs does support input debouncing, but not all of them index the debounce time values (DBNC_SETTING registers) the same way. Before this change, in some cases, as an example, requesting a debounce time of 16 milliseconds would mistakenly set the relative DBNC_SETTING register to 0x2, resulting in a way shorter debounce time of 500uS. To fix the aforementioned issue, define three different debounce_time arrays, reflecting the correct register index for each value and for each register index variant, and make sure that each SoC pinctrl driver uses the right one. Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221111094106.18486-1-angelogioacchino.delregno@collabora.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 31 +++++++++++++++++++---- drivers/pinctrl/mediatek/mtk-eint.h | 6 +++++ drivers/pinctrl/mediatek/pinctrl-mt2701.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt2712.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt6765.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt6779.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt6795.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt7622.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt7623.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt7629.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt7986.c | 2 ++ drivers/pinctrl/mediatek/pinctrl-mt8127.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8135.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8167.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8173.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8183.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8186.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8188.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8192.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8195.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8365.c | 1 + drivers/pinctrl/mediatek/pinctrl-mt8516.c | 1 + 22 files changed, 53 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index f7b54a551764..e94ee802c603 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -24,6 +24,7 @@ #define MTK_EINT_EDGE_SENSITIVE 0 #define MTK_EINT_LEVEL_SENSITIVE 1 #define MTK_EINT_DBNC_SET_DBNC_BITS 4 +#define MTK_EINT_DBNC_MAX 16 #define MTK_EINT_DBNC_RST_BIT (0x1 << 1) #define MTK_EINT_DBNC_SET_EN (0x1 << 0) @@ -48,6 +49,18 @@ static const struct mtk_eint_regs mtk_generic_eint_regs = { .dbnc_clr = 0x700, }; +const unsigned int debounce_time_mt2701[] = { + 500, 1000, 16000, 32000, 64000, 128000, 256000, 0 +}; + +const unsigned int debounce_time_mt6765[] = { + 125, 250, 500, 1000, 16000, 32000, 64000, 128000, 256000, 512000, 0 +}; + +const unsigned int debounce_time_mt6795[] = { + 500, 1000, 16000, 32000, 64000, 128000, 256000, 512000, 0 +}; + static void __iomem *mtk_eint_get_offset(struct mtk_eint *eint, unsigned int eint_num, unsigned int offset) @@ -404,10 +417,11 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_num, int virq, eint_offset; unsigned int set_offset, bit, clr_bit, clr_offset, rst, i, unmask, dbnc; - static const unsigned int debounce_time[] = {500, 1000, 16000, 32000, - 64000, 128000, 256000}; struct irq_data *d; + if (!eint->hw->db_time) + return -EOPNOTSUPP; + virq = irq_find_mapping(eint->domain, eint_num); eint_offset = (eint_num % 4) * 8; d = irq_get_irq_data(virq); @@ -418,9 +432,9 @@ int mtk_eint_set_debounce(struct mtk_eint *eint, unsigned long eint_num, if (!mtk_eint_can_en_debounce(eint, eint_num)) return -EINVAL; - dbnc = ARRAY_SIZE(debounce_time); - for (i = 0; i < ARRAY_SIZE(debounce_time); i++) { - if (debounce <= debounce_time[i]) { + dbnc = eint->num_db_time; + for (i = 0; i < eint->num_db_time; i++) { + if (debounce <= eint->hw->db_time[i]) { dbnc = i; break; } @@ -494,6 +508,13 @@ int mtk_eint_do_init(struct mtk_eint *eint) if (!eint->domain) return -ENOMEM; + if (eint->hw->db_time) { + for (i = 0; i < MTK_EINT_DBNC_MAX; i++) + if (eint->hw->db_time[i] == 0) + break; + eint->num_db_time = i; + } + mtk_eint_hw_init(eint); for (i = 0; i < eint->hw->ap_num; i++) { int virq = irq_create_mapping(eint->domain, i); diff --git a/drivers/pinctrl/mediatek/mtk-eint.h b/drivers/pinctrl/mediatek/mtk-eint.h index 48468d0fae68..6139b16cd225 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.h +++ b/drivers/pinctrl/mediatek/mtk-eint.h @@ -37,8 +37,13 @@ struct mtk_eint_hw { u8 ports; unsigned int ap_num; unsigned int db_cnt; + const unsigned int *db_time; }; +extern const unsigned int debounce_time_mt2701[]; +extern const unsigned int debounce_time_mt6765[]; +extern const unsigned int debounce_time_mt6795[]; + struct mtk_eint; struct mtk_eint_xt { @@ -62,6 +67,7 @@ struct mtk_eint { /* Used to fit into various EINT device */ const struct mtk_eint_hw *hw; const struct mtk_eint_regs *regs; + u16 num_db_time; /* Used to fit into various pinctrl device */ void *pctl; diff --git a/drivers/pinctrl/mediatek/pinctrl-mt2701.c b/drivers/pinctrl/mediatek/pinctrl-mt2701.c index d1583b4fdd9d..b185538452a0 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt2701.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt2701.c @@ -518,6 +518,7 @@ static const struct mtk_pinctrl_devdata mt2701_pinctrl_data = { .ports = 6, .ap_num = 169, .db_cnt = 16, + .db_time = debounce_time_mt2701, }, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-mt2712.c b/drivers/pinctrl/mediatek/pinctrl-mt2712.c index b921068f9e69..730a496848dc 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt2712.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt2712.c @@ -567,6 +567,7 @@ static const struct mtk_pinctrl_devdata mt2712_pinctrl_data = { .ports = 8, .ap_num = 229, .db_cnt = 40, + .db_time = debounce_time_mt2701, }, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-mt6765.c b/drivers/pinctrl/mediatek/pinctrl-mt6765.c index c57b19fcda03..f6ec41eb6e0c 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt6765.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt6765.c @@ -1062,6 +1062,7 @@ static const struct mtk_eint_hw mt6765_eint_hw = { .ports = 6, .ap_num = 160, .db_cnt = 13, + .db_time = debounce_time_mt6765, }; static const struct mtk_pin_soc mt6765_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt6779.c b/drivers/pinctrl/mediatek/pinctrl-mt6779.c index 4ddf8bda6827..62d4f5ad6737 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt6779.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt6779.c @@ -737,6 +737,7 @@ static const struct mtk_eint_hw mt6779_eint_hw = { .ports = 6, .ap_num = 195, .db_cnt = 13, + .db_time = debounce_time_mt2701, }; static const struct mtk_pin_soc mt6779_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt6795.c b/drivers/pinctrl/mediatek/pinctrl-mt6795.c index f90152261a0f..01e855ccd4dd 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt6795.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt6795.c @@ -475,6 +475,7 @@ static const struct mtk_eint_hw mt6795_eint_hw = { .ports = 7, .ap_num = 224, .db_cnt = 32, + .db_time = debounce_time_mt6795, }; static const unsigned int mt6795_pull_type[] = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt7622.c b/drivers/pinctrl/mediatek/pinctrl-mt7622.c index 68eee881ee3d..3c1148d59eff 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt7622.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt7622.c @@ -846,6 +846,7 @@ static const struct mtk_eint_hw mt7622_eint_hw = { .ports = 7, .ap_num = ARRAY_SIZE(mt7622_pins), .db_cnt = 20, + .db_time = debounce_time_mt6765, }; static const struct mtk_pin_soc mt7622_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt7623.c b/drivers/pinctrl/mediatek/pinctrl-mt7623.c index b8d9d31db74f..699977074697 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt7623.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt7623.c @@ -1369,6 +1369,7 @@ static const struct mtk_eint_hw mt7623_eint_hw = { .ports = 6, .ap_num = 169, .db_cnt = 20, + .db_time = debounce_time_mt2701, }; static struct mtk_pin_soc mt7623_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt7629.c b/drivers/pinctrl/mediatek/pinctrl-mt7629.c index b5f0fa43245f..2ce411cb9c6e 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt7629.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt7629.c @@ -402,6 +402,7 @@ static const struct mtk_eint_hw mt7629_eint_hw = { .ports = 7, .ap_num = ARRAY_SIZE(mt7629_pins), .db_cnt = 16, + .db_time = debounce_time_mt2701, }; static struct mtk_pin_soc mt7629_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt7986.c b/drivers/pinctrl/mediatek/pinctrl-mt7986.c index f26869f1a367..50cb736f9f11 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt7986.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt7986.c @@ -826,6 +826,7 @@ static const struct mtk_eint_hw mt7986a_eint_hw = { .ports = 7, .ap_num = ARRAY_SIZE(mt7986a_pins), .db_cnt = 16, + .db_time = debounce_time_mt6765, }; static const struct mtk_eint_hw mt7986b_eint_hw = { @@ -833,6 +834,7 @@ static const struct mtk_eint_hw mt7986b_eint_hw = { .ports = 7, .ap_num = ARRAY_SIZE(mt7986b_pins), .db_cnt = 16, + .db_time = debounce_time_mt6765, }; static struct mtk_pin_soc mt7986a_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8127.c b/drivers/pinctrl/mediatek/pinctrl-mt8127.c index 91c530e7b00e..e8772dcfe69e 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8127.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8127.c @@ -286,6 +286,7 @@ static const struct mtk_pinctrl_devdata mt8127_pinctrl_data = { .ports = 6, .ap_num = 143, .db_cnt = 16, + .db_time = debounce_time_mt2701, }, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8135.c b/drivers/pinctrl/mediatek/pinctrl-mt8135.c index 562846756517..cdb0252071fb 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8135.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8135.c @@ -315,6 +315,7 @@ static const struct mtk_pinctrl_devdata mt8135_pinctrl_data = { .ports = 6, .ap_num = 192, .db_cnt = 16, + .db_time = debounce_time_mt2701, }, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8167.c b/drivers/pinctrl/mediatek/pinctrl-mt8167.c index 825167f5d020..866da2c4a890 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8167.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8167.c @@ -319,6 +319,7 @@ static const struct mtk_pinctrl_devdata mt8167_pinctrl_data = { .ports = 6, .ap_num = 169, .db_cnt = 64, + .db_time = debounce_time_mt6795, }, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8173.c b/drivers/pinctrl/mediatek/pinctrl-mt8173.c index 1d7d11a32e7d..37d8cec1c3ce 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8173.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8173.c @@ -327,6 +327,7 @@ static const struct mtk_pinctrl_devdata mt8173_pinctrl_data = { .ports = 6, .ap_num = 224, .db_cnt = 16, + .db_time = debounce_time_mt2701, }, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8183.c b/drivers/pinctrl/mediatek/pinctrl-mt8183.c index fecb1e64fff4..ddc48b725c22 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8183.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8183.c @@ -545,6 +545,7 @@ static const struct mtk_eint_hw mt8183_eint_hw = { .ports = 6, .ap_num = 212, .db_cnt = 13, + .db_time = debounce_time_mt6765, }; static const struct mtk_pin_soc mt8183_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8186.c b/drivers/pinctrl/mediatek/pinctrl-mt8186.c index a4dd5197abc1..a02f7c326970 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8186.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8186.c @@ -1222,6 +1222,7 @@ static const struct mtk_eint_hw mt8186_eint_hw = { .ports = 7, .ap_num = 217, .db_cnt = 32, + .db_time = debounce_time_mt6765, }; static const struct mtk_pin_soc mt8186_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8188.c b/drivers/pinctrl/mediatek/pinctrl-mt8188.c index d0e75c1b4417..6a3d0126288e 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8188.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8188.c @@ -1625,6 +1625,7 @@ static const struct mtk_eint_hw mt8188_eint_hw = { .ports = 7, .ap_num = 225, .db_cnt = 32, + .db_time = debounce_time_mt6765, }; static const struct mtk_pin_soc mt8188_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8192.c b/drivers/pinctrl/mediatek/pinctrl-mt8192.c index 78c02b7c81f0..9695f4ec6aba 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8192.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8192.c @@ -1371,6 +1371,7 @@ static const struct mtk_eint_hw mt8192_eint_hw = { .ports = 7, .ap_num = 224, .db_cnt = 32, + .db_time = debounce_time_mt6765, }; static const struct mtk_pin_reg_calc mt8192_reg_cals[PINCTRL_PIN_REG_MAX] = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8195.c b/drivers/pinctrl/mediatek/pinctrl-mt8195.c index 563693d3d4c2..89557c7ed2ab 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8195.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8195.c @@ -935,6 +935,7 @@ static const struct mtk_eint_hw mt8195_eint_hw = { .ports = 7, .ap_num = 225, .db_cnt = 32, + .db_time = debounce_time_mt6765, }; static const struct mtk_pin_soc mt8195_data = { diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8365.c b/drivers/pinctrl/mediatek/pinctrl-mt8365.c index 57f37a294063..e31b89b226b7 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8365.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8365.c @@ -453,6 +453,7 @@ static const struct mtk_pinctrl_devdata mt8365_pinctrl_data = { .ports = 5, .ap_num = 160, .db_cnt = 160, + .db_time = debounce_time_mt6765, }, }; diff --git a/drivers/pinctrl/mediatek/pinctrl-mt8516.c b/drivers/pinctrl/mediatek/pinctrl-mt8516.c index 939a1932b8dc..e929339dd2cb 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt8516.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt8516.c @@ -319,6 +319,7 @@ static const struct mtk_pinctrl_devdata mt8516_pinctrl_data = { .ports = 6, .ap_num = 169, .db_cnt = 64, + .db_time = debounce_time_mt6795, }, }; From 5a01c805441bdc86e7af206d8a03735cc9394ffb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 12 Nov 2022 15:06:07 -0500 Subject: [PATCH 230/963] NFSD: Fix trace_nfsd_fh_verify_err() crasher Now that the nfsd_fh_verify_err() tracepoint is always called on error, it needs to handle cases where the filehandle is not yet fully formed. Fixes: 93c128e709ae ("nfsd: ensure we always call fh_verify_error tracepoint") Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton --- fs/nfsd/trace.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 06a96e955bd0..d4b6839bb459 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -254,7 +254,10 @@ TRACE_EVENT_CONDITION(nfsd_fh_verify_err, rqstp->rq_xprt->xpt_remotelen); __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); - __entry->inode = d_inode(fhp->fh_dentry); + if (fhp->fh_dentry) + __entry->inode = d_inode(fhp->fh_dentry); + else + __entry->inode = NULL; __entry->type = type; __entry->access = access; __entry->error = be32_to_cpu(error); From a6f4f1662711bd03308371d9649783a5be596898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 10 Nov 2022 18:49:18 +0100 Subject: [PATCH 231/963] HID: uclogic: Fix frame templates for big endian architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When parsing a frame template with a placeholder indicating the number of buttons present on the frame its value was incorrectly set on big endian architectures due to double little endian conversion. In order to reproduce the issue and verify the fix, run the HID KUnit tests on the PowerPC architecture: $ ./tools/testing/kunit/kunit.py run --kunitconfig=drivers/hid \ --arch=powerpc --cross_compile=powerpc64-linux-gnu- Fixes: 867c89254425 ("HID: uclogic: Allow to generate frame templates") Signed-off-by: José Expósito Signed-off-by: Jiri Kosina --- drivers/hid/hid-uclogic-rdesc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-uclogic-rdesc.c b/drivers/hid/hid-uclogic-rdesc.c index 4bd54c4fb5b0..6b73eb0df6bd 100644 --- a/drivers/hid/hid-uclogic-rdesc.c +++ b/drivers/hid/hid-uclogic-rdesc.c @@ -1193,7 +1193,7 @@ __u8 *uclogic_rdesc_template_apply(const __u8 *template_ptr, p[sizeof(btn_head)] < param_num) { v = param_list[p[sizeof(btn_head)]]; put_unaligned((__u8)0x2A, p); /* Usage Maximum */ - put_unaligned_le16((__force u16)cpu_to_le16(v), p + 1); + put_unaligned((__force u16)cpu_to_le16(v), (s16 *)(p + 1)); p += sizeof(btn_head) + 1; } else { p++; From 37020bbb71d911431e16c2c940b97cf86ae4f2f6 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Mon, 14 Nov 2022 20:19:43 +0800 Subject: [PATCH 232/963] erofs: fix missing xas_retry() in fscache mode The xarray iteration only holds the RCU read lock and thus may encounter XA_RETRY_ENTRY if there's process modifying the xarray concurrently. This will cause oops when referring to the invalid entry. Fix this by adding the missing xas_retry(), which will make the iteration wind back to the root node if XA_RETRY_ENTRY is encountered. Fixes: d435d53228dd ("erofs: change to use asynchronous io for fscache readpage/readahead") Suggested-by: David Howells Reviewed-by: Gao Xiang Reviewed-by: Jia Zhu Signed-off-by: Jingbo Xu Link: https://lore.kernel.org/r/20221114121943.29987-1-jefflexu@linux.alibaba.com Signed-off-by: Gao Xiang --- fs/erofs/fscache.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 6eaf4a4ab95c..af5ed6b9c54d 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -75,11 +75,15 @@ static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq) rcu_read_lock(); xas_for_each(&xas, folio, last_page) { - unsigned int pgpos = - (folio_index(folio) - start_page) * PAGE_SIZE; - unsigned int pgend = pgpos + folio_size(folio); + unsigned int pgpos, pgend; bool pg_failed = false; + if (xas_retry(&xas, folio)) + continue; + + pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; + pgend = pgpos + folio_size(folio); + for (;;) { if (!subreq) { pg_failed = true; From d7dbd43f4a828fa1d9a8614d5b0ac40aee6375fe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Nov 2022 10:19:30 -0800 Subject: [PATCH 233/963] blk-cgroup: properly pin the parent in blkcg_css_online blkcg_css_online is supposed to pin the blkcg of the parent, but 397c9f46ee4d refactored things and along the way, changed it to pin the css instead. This results in extra pins, and we end up leaking blkcgs and cgroups. Fixes: 397c9f46ee4d ("blk-cgroup: move blkcg_{pin,unpin}_online out of line") Signed-off-by: Chris Mason Spotted-by: Rik van Riel Cc: # v5.19+ Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20221114181930.2093706-1-clm@fb.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 6a5c849ee061..ed761c62ad0a 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1213,7 +1213,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css) * parent so that offline always happens towards the root. */ if (parent) - blkcg_pin_online(css); + blkcg_pin_online(&parent->css); return 0; } From 5fd2a60aecf3a42b14fa371c55b3dbb18b229230 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 14 Nov 2022 15:52:57 +0100 Subject: [PATCH 234/963] libbpf: Use correct return pointer in attach_raw_tp We need to pass '*link' to final libbpf_get_error, because that one holds the return value, not 'link'. Fixes: 4fa5bcfe07f7 ("libbpf: Allow BPF program auto-attach handlers to bail out") Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221114145257.882322-1-jolsa@kernel.org --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 184ce1684dcd..91b7106a4a73 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11169,7 +11169,7 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } *link = bpf_program__attach_raw_tracepoint(prog, tp_name); - return libbpf_get_error(link); + return libbpf_get_error(*link); } /* Common logic for all BPF program types that attach to a btf_id */ From 79ece9b292af6b0edcfb4d67a00711d25507640b Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 14 Nov 2022 13:20:34 +0100 Subject: [PATCH 235/963] i2c: Restore initial power state if probe fails A driver that supports I2C_DRV_ACPI_WAIVE_D0_PROBE is not expected to power off a device that it has not powered on previously. For devices operating in "full_power" mode, the first call to `i2c_acpi_waive_d0_probe` will return 0, which means that the device will be turned on with `dev_pm_domain_attach`. If probe fails the second call to `i2c_acpi_waive_d0_probe` will return 1, which means that the device will not be turned off. This is, it will be left in a different power state. Lets fix it. Reviewed-by: Hidenori Kobayashi Reviewed-by: Sergey Senozhatsky Reviewed-by: Sakari Ailus Cc: stable@vger.kernel.org Fixes: b18c1ad685d9 ("i2c: Allow an ACPI driver to manage the device's power state during probe") Signed-off-by: Ricardo Ribalda Reviewed-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index b4edf10e8fd0..7539b0740351 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -467,6 +467,7 @@ static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); struct i2c_driver *driver; + bool do_power_on; int status; if (!client) @@ -545,8 +546,8 @@ static int i2c_device_probe(struct device *dev) if (status < 0) goto err_clear_wakeup_irq; - status = dev_pm_domain_attach(&client->dev, - !i2c_acpi_waive_d0_probe(dev)); + do_power_on = !i2c_acpi_waive_d0_probe(dev); + status = dev_pm_domain_attach(&client->dev, do_power_on); if (status) goto err_clear_wakeup_irq; @@ -585,7 +586,7 @@ static int i2c_device_probe(struct device *dev) err_release_driver_resources: devres_release_group(&client->dev, client->devres_group_id); err_detach_pm_domain: - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, do_power_on); err_clear_wakeup_irq: dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -610,7 +611,7 @@ static void i2c_device_remove(struct device *dev) devres_release_group(&client->dev, client->devres_group_id); - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, true); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); From fdd0d6b2eb35c83d6b1226ad20b346a4b45ddfb8 Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Mon, 14 Nov 2022 10:50:56 +0800 Subject: [PATCH 236/963] iio: adc: aspeed: Remove the trim valid dts property. The dts property "aspeed,trim-data-valid" is currently used to determine whether to read trimming data from the OTP register. If this is set on a device without valid trimming data in the OTP the ADC will not function correctly. This patch drops the use of this property and instead uses the default (unprogrammed) OTP value of 0 to detect when a fallback value of 0x8 should be used rather then the value read from the OTP. Fixes: d0a4c17b4073 ("iio: adc: aspeed: Get and set trimming data.") Signed-off-by: Billy Tsai Link: https://lore.kernel.org/r/20221114025057.10843-1-billy_tsai@aspeedtech.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/aspeed_adc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 9341e0e0eb55..998e8bcc06e1 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -202,6 +202,8 @@ static int aspeed_adc_set_trim_data(struct iio_dev *indio_dev) ((scu_otp) & (data->model_data->trim_locate->field)) >> __ffs(data->model_data->trim_locate->field); + if (!trimming_val) + trimming_val = 0x8; } dev_dbg(data->dev, "trimming val = %d, offset = %08x, fields = %08x\n", @@ -563,12 +565,9 @@ static int aspeed_adc_probe(struct platform_device *pdev) if (ret) return ret; - if (of_find_property(data->dev->of_node, "aspeed,trim-data-valid", - NULL)) { - ret = aspeed_adc_set_trim_data(indio_dev); - if (ret) - return ret; - } + ret = aspeed_adc_set_trim_data(indio_dev); + if (ret) + return ret; if (of_find_property(data->dev->of_node, "aspeed,battery-sensing", NULL)) { From 398e3479874f381cca8726ca5d8a31e1bf35a3cd Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Mon, 14 Nov 2022 10:50:57 +0800 Subject: [PATCH 237/963] dt-bindings: iio: adc: Remove the property "aspeed,trim-data-valid" If the property is set on a device without valid trimming data in the OTP the ADC will not function correctly. Therefore, this patch drops the use of this property to avoid this scenario. Fixes: 2bdb2f00a895 ("dt-bindings: iio: adc: Add ast2600-adc bindings") Signed-off-by: Billy Tsai Acked-by: Rob Herring Link: https://lore.kernel.org/r/20221114025057.10843-2-billy_tsai@aspeedtech.com Cc: Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml b/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml index b283c8ca2bbf..5c08d8b6e995 100644 --- a/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/aspeed,ast2600-adc.yaml @@ -62,13 +62,6 @@ properties: description: Inform the driver that last channel will be used to sensor battery. - aspeed,trim-data-valid: - type: boolean - description: | - The ADC reference voltage can be calibrated to obtain the trimming - data which will be stored in otp. This property informs the driver that - the data store in the otp is valid. - required: - compatible - reg From 9ad6645a9dce4d0e42daca6ebf32a154401c59d3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 8 Nov 2022 16:13:50 +0100 Subject: [PATCH 238/963] HID: ite: Enable QUIRK_TOUCHPAD_ON_OFF_REPORT on Acer Aspire Switch V 10 The Acer Aspire Switch V 10 (SW5-017)'s keyboard-dock uses the same ITE controller setup as other Acer Switch 2-in-1's. This needs special handling for the wifi on/off toggle hotkey as well as to properly report touchpad on/off keypresses. Add the USB-ids for the SW5-017's keyboard-dock with a quirk setting of QUIRK_TOUCHPAD_ON_OFF_REPORT to fix both issues. Cc: Rudolf Polzer Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-ite.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index dad953f66996..b3dfe8d9e556 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1215,6 +1215,7 @@ #define USB_DEVICE_ID_SYNAPTICS_DELL_K15A 0x6e21 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5 +#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017 0x73f6 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7 #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 430fa4f52ed3..75ebfcf31889 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -121,6 +121,11 @@ static const struct hid_device_id ite_devices[] = { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, + /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); From d180b6496143cd360c5d5f58ae4b9a8229c1f344 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Fri, 11 Nov 2022 15:55:11 +0300 Subject: [PATCH 239/963] HID: hid-lg4ff: Add check for empty lbuf If an empty buf is received, lbuf is also empty. So lbuf is accessed by index -1. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: f31a2de3fe36 ("HID: hid-lg4ff: Allow switching of Logitech gaming wheels between compatibility modes") Signed-off-by: Anastasia Belova Signed-off-by: Jiri Kosina --- drivers/hid/hid-lg4ff.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 5e6a0cef2a06..e3fcf1353fb3 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -872,6 +872,12 @@ static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_att return -ENOMEM; i = strlen(lbuf); + + if (i == 0) { + kfree(lbuf); + return -EINVAL; + } + if (lbuf[i-1] == '\n') { if (i == 1) { kfree(lbuf); From 30f5312d2c722107364f266cfa98ef4f0857c1fb Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Fri, 11 Nov 2022 18:03:27 +0100 Subject: [PATCH 240/963] mlxsw: Avoid warnings when not offloaded FDB entry with IPv6 is removed FDB entries that perform VXLAN encapsulation with an IPv6 underlay hold a reference on a resource - the KVDL entry where the IPv6 underlay destination IP is stored. For that, the driver maintains two hash tables: 1. Maps IPv6 to KVDL index 2. Maps {MAC, FID index} to IPv6 address When a FDB entry is removed, the second table is used to find the relevant IPv6 address and the first table is used to remove the reference count and free the index if is not used anymore. In order for a packet to be forwarded to a single remote VTEP, FDB entries need to be configured at both the bridge and VXLAN devices' FDB tables. Both entries are squashed into one {MAC, VLAN/VNI} -> IP entry in the hardware. Therefore, in case one entry is removed, the entry will be removed from the hardware and the remaining entry will be unmarked with 'offload' flag since it is not offloaded anymore. For example, the two FDB entries should be added to allow packets to be forwarded via vx10: $ bridge fdb add dev vx10 aa:bb:cc:dd:ee:ff self static dst 2001:db8:5::1 $ bridge fdb add dev vx10 aa:bb:cc:dd:ee:ff master static vlan 10 When one entry will be removed, the second one will not be offloaded anymore. When the first entry (in VXLAN FDB) will be removed / will not be offloaded anymore, the two mappings in IPv6 hash tables will be removed. In case that the second entry is removed before the first one, unexpected warnings[1][2] will be shown in user space as a result of removing the first entry. The issue is that not offloaded entry is removed, the driver tries to search the relevant entries in the hash tables, does not find them and therefore warns. Do not handle removing of not offloaded VXLAN FDB entries, as they were already removed when the offload flag was removed. [1]: WARNING: CPU: 1 PID: 239 at drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c:914 mlxsw_sp_nve_ipv6_addr_map_del+0x6b/0x80 [mlxsw_spectrum] ... Hardware name: Mellanox Technologies Ltd. Mellanox switch/Mellanox switch, BIOS 4.6.5 05/21/2015 Workqueue: mlxsw_core_ordered mlxsw_sp_switchdev_vxlan_fdb_event_work [mlxsw_spectrum] RIP: 0010:mlxsw_sp_nve_ipv6_addr_map_del+0x6b/0x80 [mlxsw_spectrum] ... Call Trace: mlxsw_sp_port_fdb_tunnel_uc_op+0x6cf/0x7b0 [mlxsw_spectrum] mlxsw_sp_switchdev_vxlan_fdb_event_work+0x17c/0x420 [mlxsw_spectrum] ? finish_task_switch.isra.0+0x8c/0x290 process_one_work+0x1cd/0x390 worker_thread+0x48/0x3c0 ? process_one_work+0x390/0x390 kthread+0xe0/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 [2]: WARNING: CPU: 0 PID: 239 at drivers/net/ethernet/mellanox/mlxsw/spectrum.c:3035 mlxsw_sp_ipv6_addr_put+0x142/0x220 [mlxsw_spectrum] ... Hardware name: Mellanox Technologies Ltd. Mellanox switch/Mellanox switch, BIOS 4.6.5 05/21/2015 Workqueue: mlxsw_core_ordered mlxsw_sp_switchdev_vxlan_fdb_event_work [mlxsw_spectrum] RIP: 0010:mlxsw_sp_ipv6_addr_put+0x142/0x220 [mlxsw_spectrum] ... Call Trace: ? mlxsw_sp_port_fdb_tun_uc_op6_sfd_write+0x5c1/0x610 [mlxsw_spectrum] mlxsw_sp_port_fdb_tunnel_uc_op+0x6ec/0x7b0 [mlxsw_spectrum] mlxsw_sp_switchdev_vxlan_fdb_event_work+0x17c/0x420 [mlxsw_spectrum] ? finish_task_switch.isra.0+0x8c/0x290 process_one_work+0x1cd/0x390 worker_thread+0x48/0x3c0 ? process_one_work+0x390/0x390 kthread+0xe0/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Fixes: 0860c7641634 ("mlxsw: spectrum_nve: Keep track of IPv6 addresses used by FDB entries") Signed-off-by: Amit Cohen Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/c186de8cbd28e3eb661e06f31f7f2f2dff30020f.1668184350.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 4efccd942fb8..1290b2d3eae6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -3470,6 +3470,8 @@ mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp, u16 vid; vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + if (!vxlan_fdb_info->offloaded) + return; bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); if (!bridge_device) From 280c0f7cd0aa4d190619b18243110e052a90775c Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sun, 13 Nov 2022 09:29:29 +0000 Subject: [PATCH 241/963] net: ionic: Fix error handling in ionic_init_module() A problem about ionic create debugfs failed is triggered with the following log given: [ 415.799514] debugfs: Directory 'ionic' with parent '/' already present! The reason is that ionic_init_module() returns ionic_bus_register_driver() directly without checking its return value, if ionic_bus_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ionic can never be created later. ionic_init_module() ionic_debugfs_create() # create debugfs directory ionic_bus_register_driver() pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when ionic_bus_register_driver() returns error. Fixes: fbfb8031533c ("ionic: Add hardware init and device commands") Signed-off-by: Yuan Can Acked-by: Shannon Nelson Link: https://lore.kernel.org/r/20221113092929.19161-1-yuancan@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 56f93b030551..5456c2b15d9b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -687,8 +687,14 @@ int ionic_port_reset(struct ionic *ionic) static int __init ionic_init_module(void) { + int ret; + ionic_debugfs_create(); - return ionic_bus_register_driver(); + ret = ionic_bus_register_driver(); + if (ret) + ionic_debugfs_destroy(); + + return ret; } static void __exit ionic_cleanup_module(void) From fc2292bd01a25279bf3e29c99e60bfe52d900a91 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Thu, 13 Oct 2022 14:46:40 -0700 Subject: [PATCH 242/963] MAINTAINERS: git://github.com -> https://github.com for HiSilicon Github deprecated the git:// links about a year ago, so let's move to the https:// URLs instead. Reported-by: Conor Dooley Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/ Signed-off-by: Palmer Dabbelt Signed-off-by: Wei Xu --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf0f18502372..a119500e1946 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2197,7 +2197,7 @@ M: Wei Xu L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported W: http://www.hisilicon.com -T: git git://github.com/hisilicon/linux-hisi.git +T: git https://github.com/hisilicon/linux-hisi.git F: arch/arm/boot/dts/hi3* F: arch/arm/boot/dts/hip* F: arch/arm/boot/dts/hisi* From d1463bf5661f899f515e71f2739ae067b2883f36 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Mon, 14 Nov 2022 22:33:29 +0000 Subject: [PATCH 243/963] ANDROID: GKI: Convert TIPC to gki modules Converts following networking features to modules: CONFIG_TIPC: The Transparent Inter Process Communication (TIPC) protocol. CONFIG_TIPC_DIAG: TIPC: socket monitoring interface Bug: 232431151 Test: TH Change-Id: I370cc7a34f67222b619107d773cc4a13edd6b538 Signed-off-by: Ramji Jiyani (cherry picked from commit e516a098e2a4605ce113dc85ba6ac4c93d5136fc) --- BUILD.bazel | 2 ++ android/gki_system_dlkm_modules | 2 ++ arch/arm64/configs/gki_defconfig | 2 +- arch/x86/configs/gki_defconfig | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index a0ad8f3ada40..809c555d5051 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -34,6 +34,8 @@ _common_gki_modules_list = [ "net/can/can-gw.ko", "net/can/can-raw.ko", "net/mac80211/mac80211.ko", + "net/tipc/diag.ko", + "net/tipc/tipc.ko", "net/wireless/cfg80211.ko", ] diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index f9b39a9a998b..2a9ac5ca246c 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -10,5 +10,7 @@ net/can/can-bcm.ko net/can/can-gw.ko net/can/can-raw.ko net/mac80211/mac80211.ko +net/tipc/diag.ko +net/tipc/tipc.ko net/wireless/cfg80211.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 4432cff24aa2..d491f00a1342 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -212,7 +212,7 @@ CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y CONFIG_IP6_NF_RAW=y -CONFIG_TIPC=y +CONFIG_TIPC=m CONFIG_L2TP=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 663ab21c2e0f..8880f3456be7 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -202,7 +202,7 @@ CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y CONFIG_IP6_NF_RAW=y -CONFIG_TIPC=y +CONFIG_TIPC=m CONFIG_L2TP=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y From dd503c2c080fc503e58e589ffdfce9075dfa25b3 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Mon, 14 Nov 2022 23:15:13 +0000 Subject: [PATCH 244/963] ANDROID: GKI: Convert VLAN 8021Q as gki module Sets following feature to module: CONFIG_VLAN_8021Q: Create 8021Q vlan interfaces. Bug: 232431151 Test: TH Change-Id: I4728f03b04bb49f65838564c96f88d232cbf21e8 Signed-off-by: Ramji Jiyani (cherry picked from commit 2cf15a5bbc4a8d7e8f33a31e1647ce1095150aaa) --- BUILD.bazel | 1 + android/gki_system_dlkm_modules | 1 + arch/arm64/configs/gki_defconfig | 2 +- arch/x86/configs/gki_defconfig | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 809c555d5051..932001e19545 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -30,6 +30,7 @@ _common_gki_modules_list = [ "drivers/usb/serial/ftdi_sio.ko", "drivers/usb/serial/usbserial.ko", "mm/zsmalloc.ko", + "net/8021q/8021q.ko", "net/can/can-bcm.ko", "net/can/can-gw.ko", "net/can/can-raw.ko", diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index 2a9ac5ca246c..f6518233d9ff 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -6,6 +6,7 @@ drivers/usb/class/cdc-acm.ko drivers/usb/serial/ftdi_sio.ko drivers/usb/serial/usbserial.ko mm/zsmalloc.ko +net/8021q/8021q.ko net/can/can-bcm.ko net/can/can-gw.ko net/can/can-raw.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index d491f00a1342..f74d71ee0f22 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -215,7 +215,7 @@ CONFIG_IP6_NF_RAW=y CONFIG_TIPC=m CONFIG_L2TP=y CONFIG_BRIDGE=y -CONFIG_VLAN_8021Q=y +CONFIG_VLAN_8021Q=m CONFIG_6LOWPAN=y CONFIG_IEEE802154=y CONFIG_IEEE802154_6LOWPAN=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 8880f3456be7..481110e55540 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -205,7 +205,7 @@ CONFIG_IP6_NF_RAW=y CONFIG_TIPC=m CONFIG_L2TP=y CONFIG_BRIDGE=y -CONFIG_VLAN_8021Q=y +CONFIG_VLAN_8021Q=m CONFIG_6LOWPAN=y CONFIG_IEEE802154=y CONFIG_IEEE802154_6LOWPAN=y From 4abb77fc5531381484477cac95913336c97176b7 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 14 Nov 2022 14:29:37 +0100 Subject: [PATCH 245/963] xen/platform-pci: use define instead of literal number Instead of "0x01" use the HVM_PARAM_CALLBACK_TYPE_PCI_INTX define from the interface header in get_callback_via(). Signed-off-by: Juergen Gross Acked-by: Stefano Stabellini Signed-off-by: Juergen Gross --- drivers/xen/platform-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 6ebd819338ec..cd07e3fed0fa 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -54,7 +54,8 @@ static uint64_t get_callback_via(struct pci_dev *pdev) pin = pdev->pin; /* We don't know the GSI. Specify the PCI INTx line instead. */ - return ((uint64_t)0x01 << HVM_CALLBACK_VIA_TYPE_SHIFT) | /* PCI INTx identifier */ + return ((uint64_t)HVM_PARAM_CALLBACK_TYPE_PCI_INTX << + HVM_CALLBACK_VIA_TYPE_SHIFT) | ((uint64_t)pci_domain_nr(pdev->bus) << 32) | ((uint64_t)pdev->bus->number << 16) | ((uint64_t)(pdev->devfn & 0xff) << 8) | From 2e35b25dd8e666b8619355fc3defb1b246a5dc02 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 15 Nov 2022 09:11:07 +0100 Subject: [PATCH 246/963] pinctrl: mediatek: Export debounce time tables The kernel test robot complains that in certain combinations when building the Mediatek drivers as modules we lack some debounce table symbols, so export them. Reported-by: kernel test robot Fixes: e1ff91f9d230 ("pinctrl: mediatek: Fix EINT pins input debounce time configuration") Cc: AngeloGioacchino Del Regno Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index e94ee802c603..65d312967619 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -52,14 +52,17 @@ static const struct mtk_eint_regs mtk_generic_eint_regs = { const unsigned int debounce_time_mt2701[] = { 500, 1000, 16000, 32000, 64000, 128000, 256000, 0 }; +EXPORT_SYMBOL_GPL(debounce_time_mt2701); const unsigned int debounce_time_mt6765[] = { 125, 250, 500, 1000, 16000, 32000, 64000, 128000, 256000, 512000, 0 }; +EXPORT_SYMBOL_GPL(debounce_time_mt6765); const unsigned int debounce_time_mt6795[] = { 500, 1000, 16000, 32000, 64000, 128000, 256000, 512000, 0 }; +EXPORT_SYMBOL_GPL(debounce_time_mt6795); static void __iomem *mtk_eint_get_offset(struct mtk_eint *eint, unsigned int eint_num, From 2bc5febd05abe86c3e3d4b4f18dff4bc4316c1be Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 8 Nov 2022 22:37:18 +0100 Subject: [PATCH 247/963] clk: samsung: Revert "clk: samsung: exynos-clkout: Use of_device_get_match_data()" of_device_get_match_data() function should not be used on the device other than the one matched to the given driver, because it always returns the match_data of the matched driver. In case of exynos-clkout driver, the code matched the OF IDs on the PARENT device, so replacing it with of_device_get_match_data() broke the driver. This reverts commit 777aaf3d1daf793461269b49c063aca1cee06a44. Signed-off-by: Marek Szyprowski Fixes: 777aaf3d1daf ("clk: samsung: exynos-clkout: Use of_device_get_match_data()") Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221108213718.32076-1-m.szyprowski@samsung.com Signed-off-by: Krzysztof Kozlowski --- drivers/clk/samsung/clk-exynos-clkout.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c index 273f77d54dab..e6d6cbf8c4e6 100644 --- a/drivers/clk/samsung/clk-exynos-clkout.c +++ b/drivers/clk/samsung/clk-exynos-clkout.c @@ -81,17 +81,19 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids); static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask) { const struct exynos_clkout_variant *variant; + const struct of_device_id *match; if (!dev->parent) { dev_err(dev, "not instantiated from MFD\n"); return -EINVAL; } - variant = of_device_get_match_data(dev->parent); - if (!variant) { + match = of_match_device(exynos_clkout_ids, dev->parent); + if (!match) { dev_err(dev, "cannot match parent device\n"); return -EINVAL; } + variant = match->data; *mux_mask = variant->mux_mask; From d5ceb4d1c50786d21de3d4b06c3f43109ec56dd8 Mon Sep 17 00:00:00 2001 From: Bean Huo Date: Mon, 14 Nov 2022 14:48:52 +0100 Subject: [PATCH 248/963] nvme-pci: add NVME_QUIRK_BOGUS_NID for Micron Nitro Added a quirk to fix Micron Nitro NVMe reporting duplicate NGUIDs. Cc: Signed-off-by: Bean Huo Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 02b5578773a1..bf708fd5adf6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3489,6 +3489,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, + { PCI_DEVICE(0x1344, 0x6001), /* Micron Nitro NVMe */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */ From 148f4b32b4504d8a32cf82049b7b9499a4b299ab Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Wed, 9 Nov 2022 22:24:15 +0100 Subject: [PATCH 249/963] USB: serial: option: add Fibocom FM160 0x0111 composition Add support for the following Fibocom FM160 composition: 0x0111: MBIM + MODEM + DIAG + AT T: Bus=01 Lev=02 Prnt=125 Port=01 Cnt=02 Dev#= 93 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2cb7 ProdID=0111 Rev= 5.04 S: Manufacturer=Fibocom S: Product=Fibocom FM160 Modem_SN:12345678 S: SerialNumber=12345678 C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Reinhard Speyerer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 37257a52287d..2a0d8fc9af91 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2170,6 +2170,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x010a, 0xff) }, /* Fibocom MA510 (ECM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0111, 0xff) }, /* Fibocom FM160 (MBIM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ From da74858a475782a3f16470907814c8cc5950ad68 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Sun, 30 Oct 2022 21:56:29 +0100 Subject: [PATCH 250/963] ARM: dts: rockchip: disable arm_global_timer on rk3066 and rk3188 The clock source and the sched_clock provided by the arm_global_timer on Rockchip rk3066a/rk3188 are quite unstable because their rates depend on the CPU frequency. Recent changes to the arm_global_timer driver makes it impossible to use. On the other side, the arm_global_timer has a higher rating than the ROCKCHIP_TIMER, it will be selected by default by the time framework while we want to use the stable Rockchip clock source. Keep the arm_global_timer disabled in order to have the DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/f275ca8d-fd0a-26e5-b978-b7f3df815e0a@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3188.dtsi | 1 - arch/arm/boot/dts/rk3xxx.dtsi | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index cdd4a0bd5133..486a96ce2a09 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -607,7 +607,6 @@ &emac { &global_timer { interrupts = ; - status = "disabled"; }; &local_timer { diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index bf285091a9eb..cb4e42ede56a 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -76,6 +76,13 @@ global_timer: global-timer@1013c200 { reg = <0x1013c200 0x20>; interrupts = ; clocks = <&cru CORE_PERI>; + status = "disabled"; + /* The clock source and the sched_clock provided by the arm_global_timer + * on Rockchip rk3066a/rk3188 are quite unstable because their rates + * depend on the CPU frequency. + * Keep the arm_global_timer disabled in order to have the + * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. + */ }; local_timer: local-timer@1013c600 { From a5b5fb0fc47ddc7d1ed6a0365197639a01bc1f3a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 7 Nov 2022 20:33:23 -0600 Subject: [PATCH 251/963] platform/x86/amd: pmc: Remove more CONFIG_DEBUG_FS checks commit b37fe34c8309 ("platform/x86/amd: pmc: remove CONFIG_DEBUG_FS checks") removed most CONFIG_DEBUG_FS checks, but there were some left that were reported to cause compile test failures. Remove the remaining checks, and also the unnecessary CONFIG_SUSPEND used in the same place. Reported-by: liyupeng@zbhlos.com Fixes: b37fe34c8309 ("platform/x86/amd: pmc: remove CONFIG_DEBUG_FS checks") Signed-off-by: Mario Limonciello Link: https://bugzilla.kernel.org/show_bug.cgi?id=216679 Link: https://lore.kernel.org/r/20221108023323.19304-1-mario.limonciello@amd.com Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 96e790e639a2..3b44f2fff5be 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -276,7 +276,6 @@ static const struct file_operations amd_pmc_stb_debugfs_fops_v2 = { .release = amd_pmc_stb_debugfs_release_v2, }; -#if defined(CONFIG_SUSPEND) || defined(CONFIG_DEBUG_FS) static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev) { if (dev->cpu_id == AMD_CPU_ID_PCO) { @@ -351,7 +350,6 @@ static int get_metrics_table(struct amd_pmc_dev *pdev, struct smu_metrics *table memcpy_fromio(table, pdev->smu_virt_addr, sizeof(struct smu_metrics)); return 0; } -#endif /* CONFIG_SUSPEND || CONFIG_DEBUG_FS */ #ifdef CONFIG_SUSPEND static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev) From 5121197ecc5db58c07da95eb1ff82b98b121a221 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sun, 13 Nov 2022 16:51:19 -0800 Subject: [PATCH 252/963] kcm: close race conditions on sk_receive_queue sk->sk_receive_queue is protected by skb queue lock, but for KCM sockets its RX path takes mux->rx_lock to protect more than just skb queue. However, kcm_recvmsg() still only grabs the skb queue lock, so race conditions still exist. We can teach kcm_recvmsg() to grab mux->rx_lock too but this would introduce a potential performance regression as struct kcm_mux can be shared by multiple KCM sockets. So we have to enforce skb queue lock in requeue_rx_msgs() and handle skb peek case carefully in kcm_wait_data(). Fortunately, skb_recv_datagram() already handles it nicely and is widely used by other sockets, we can just switch to skb_recv_datagram() after getting rid of the unnecessary sock lock in kcm_recvmsg() and kcm_splice_read(). Side note: SOCK_DONE is not used by KCM sockets, so it is safe to get rid of this check too. I ran the original syzbot reproducer for 30 min without seeing any issue. Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+278279efdd2730dd14bf@syzkaller.appspotmail.com Reported-by: shaozhengchao Cc: Paolo Abeni Cc: Tom Herbert Signed-off-by: Cong Wang Link: https://lore.kernel.org/r/20221114005119.597905-1-xiyou.wangcong@gmail.com Signed-off-by: Paolo Abeni --- net/kcm/kcmsock.c | 58 +++++------------------------------------------ 1 file changed, 6 insertions(+), 52 deletions(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a5004228111d..890a2423f559 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -222,7 +222,7 @@ static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head) struct sk_buff *skb; struct kcm_sock *kcm; - while ((skb = __skb_dequeue(head))) { + while ((skb = skb_dequeue(head))) { /* Reset destructor to avoid calling kcm_rcv_ready */ skb->destructor = sock_rfree; skb_orphan(skb); @@ -1085,53 +1085,17 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) return err; } -static struct sk_buff *kcm_wait_data(struct sock *sk, int flags, - long timeo, int *err) -{ - struct sk_buff *skb; - - while (!(skb = skb_peek(&sk->sk_receive_queue))) { - if (sk->sk_err) { - *err = sock_error(sk); - return NULL; - } - - if (sock_flag(sk, SOCK_DONE)) - return NULL; - - if ((flags & MSG_DONTWAIT) || !timeo) { - *err = -EAGAIN; - return NULL; - } - - sk_wait_data(sk, &timeo, NULL); - - /* Handle signals */ - if (signal_pending(current)) { - *err = sock_intr_errno(timeo); - return NULL; - } - } - - return skb; -} - static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); int err = 0; - long timeo; struct strp_msg *stm; int copied = 0; struct sk_buff *skb; - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - lock_sock(sk); - - skb = kcm_wait_data(sk, flags, timeo, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; @@ -1162,14 +1126,11 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, /* Finished with message */ msg->msg_flags |= MSG_EOR; KCM_STATS_INCR(kcm->stats.rx_msgs); - skb_unlink(skb, &sk->sk_receive_queue); - kfree_skb(skb); } } out: - release_sock(sk); - + skb_free_datagram(sk, skb); return copied ? : err; } @@ -1179,7 +1140,6 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, { struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); - long timeo; struct strp_msg *stm; int err = 0; ssize_t copied; @@ -1187,11 +1147,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Only support splice for SOCKSEQPACKET */ - timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); - - lock_sock(sk); - - skb = kcm_wait_data(sk, flags, timeo, &err); + skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto err_out; @@ -1219,13 +1175,11 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, * finish reading the message. */ - release_sock(sk); - + skb_free_datagram(sk, skb); return copied; err_out: - release_sock(sk); - + skb_free_datagram(sk, skb); return err; } From d349e9be5a2c2d7588a2c4e4bfa0bb3dc1226769 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 02:56:59 +0000 Subject: [PATCH 253/963] net: ena: Fix error handling in ena_init() The ena_init() won't destroy workqueue created by create_singlethread_workqueue() when pci_register_driver() failed. Call destroy_workqueue() when pci_register_driver() failed to prevent the resource leak. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Yuan Can Acked-by: Shay Agroskin Link: https://lore.kernel.org/r/20221114025659.124726-1-yuancan@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index d350eeec8bad..5a454b58498f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -4543,13 +4543,19 @@ static struct pci_driver ena_pci_driver = { static int __init ena_init(void) { + int ret; + ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); if (!ena_wq) { pr_err("Failed to create workqueue\n"); return -ENOMEM; } - return pci_register_driver(&ena_pci_driver); + ret = pci_register_driver(&ena_pci_driver); + if (ret) + destroy_workqueue(ena_wq); + + return ret; } static void __exit ena_cleanup(void) From 18c532e44939caa17f1fa380f7ac50dbc0718dbb Mon Sep 17 00:00:00 2001 From: Aminuddin Jamaluddin Date: Mon, 14 Nov 2022 14:53:02 +0800 Subject: [PATCH 254/963] net: phy: marvell: add sleep time after enabling the loopback bit Sleep time is added to ensure the phy to be ready after loopback bit was set. This to prevent the phy loopback test from failing. Fixes: 020a45aff119 ("net: phy: marvell: add Marvell specific PHY loopback") Cc: # 5.15.x Signed-off-by: Muhammad Husaini Zulkifli Signed-off-by: Aminuddin Jamaluddin Link: https://lore.kernel.org/r/20221114065302.10625-1-aminuddin.jamaluddin@intel.com Signed-off-by: Paolo Abeni --- drivers/net/phy/marvell.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 2810f4f9da0c..0d706ee266af 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -2015,14 +2015,16 @@ static int m88e1510_loopback(struct phy_device *phydev, bool enable) if (err < 0) return err; - /* FIXME: Based on trial and error test, it seem 1G need to have - * delay between soft reset and loopback enablement. - */ - if (phydev->speed == SPEED_1000) - msleep(1000); + err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, + BMCR_LOOPBACK); - return phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, - BMCR_LOOPBACK); + if (!err) { + /* It takes some time for PHY device to switch + * into/out-of loopback mode. + */ + msleep(1000); + } + return err; } else { err = phy_modify(phydev, MII_BMCR, BMCR_LOOPBACK, 0); if (err < 0) From a56cad694767ebdb7d80f27ffc239db46fff64de Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Mon, 14 Nov 2022 16:20:46 +0800 Subject: [PATCH 255/963] net: hns3: fix incorrect hw rss hash type of rx packet Currently, the HNS3 driver reports the rss hash type of each packet based on the rss hash tuples set. It always reports PKT_HASH_TYPE_L4, without checking the type of current packet. It's incorrect. Fixes it by reporting it base on the packet type. Fixes: 796640778c26 ("net: hns3: support RXD advanced layout") Fixes: 232fc64b6e62 ("net: hns3: Add HW RSS hash information to RX skb") Fixes: ea4858670717 ("net: hns3: handle the BD info on the last BD of the packet") Signed-off-by: Jian Shen Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 - .../hns3/hns3_common/hclge_comm_rss.c | 20 --- .../hns3/hns3_common/hclge_comm_rss.h | 2 - .../net/ethernet/hisilicon/hns3/hns3_enet.c | 163 ++++++++++-------- .../net/ethernet/hisilicon/hns3/hns3_enet.h | 1 + .../hisilicon/hns3/hns3pf/hclge_main.c | 1 - 6 files changed, 94 insertions(+), 94 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 0179fc288f5f..17137de9338c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -819,7 +819,6 @@ struct hnae3_knic_private_info { const struct hnae3_dcb_ops *dcb_ops; u16 int_rl_setting; - enum pkt_hash_types rss_type; void __iomem *io_base; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c index e23729ac3bb8..ae2736549526 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c @@ -191,23 +191,6 @@ u32 hclge_comm_get_rss_key_size(struct hnae3_handle *handle) return HCLGE_COMM_RSS_KEY_SIZE; } -void hclge_comm_get_rss_type(struct hnae3_handle *nic, - struct hclge_comm_rss_tuple_cfg *rss_tuple_sets) -{ - if (rss_tuple_sets->ipv4_tcp_en || - rss_tuple_sets->ipv4_udp_en || - rss_tuple_sets->ipv4_sctp_en || - rss_tuple_sets->ipv6_tcp_en || - rss_tuple_sets->ipv6_udp_en || - rss_tuple_sets->ipv6_sctp_en) - nic->kinfo.rss_type = PKT_HASH_TYPE_L4; - else if (rss_tuple_sets->ipv4_fragment_en || - rss_tuple_sets->ipv6_fragment_en) - nic->kinfo.rss_type = PKT_HASH_TYPE_L3; - else - nic->kinfo.rss_type = PKT_HASH_TYPE_NONE; -} - int hclge_comm_parse_rss_hfunc(struct hclge_comm_rss_cfg *rss_cfg, const u8 hfunc, u8 *hash_algo) { @@ -344,9 +327,6 @@ int hclge_comm_set_rss_input_tuple(struct hnae3_handle *nic, req->ipv6_sctp_en = rss_cfg->rss_tuple_sets.ipv6_sctp_en; req->ipv6_fragment_en = rss_cfg->rss_tuple_sets.ipv6_fragment_en; - if (is_pf) - hclge_comm_get_rss_type(nic, &rss_cfg->rss_tuple_sets); - ret = hclge_comm_cmd_send(hw, &desc, 1); if (ret) dev_err(&hw->cmq.csq.pdev->dev, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h index 946d166a452d..92af3d2980d3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h @@ -95,8 +95,6 @@ struct hclge_comm_rss_tc_mode_cmd { }; u32 hclge_comm_get_rss_key_size(struct hnae3_handle *handle); -void hclge_comm_get_rss_type(struct hnae3_handle *nic, - struct hclge_comm_rss_tuple_cfg *rss_tuple_sets); void hclge_comm_rss_indir_init_cfg(struct hnae3_ae_dev *ae_dev, struct hclge_comm_rss_cfg *rss_cfg); int hclge_comm_get_rss_tuple(struct hclge_comm_rss_cfg *rss_cfg, int flow_type, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 4cb2421e71a7..7fc83409f257 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -105,26 +105,28 @@ static const struct pci_device_id hns3_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, hns3_pci_tbl); -#define HNS3_RX_PTYPE_ENTRY(ptype, l, s, t) \ +#define HNS3_RX_PTYPE_ENTRY(ptype, l, s, t, h) \ { ptype, \ l, \ CHECKSUM_##s, \ HNS3_L3_TYPE_##t, \ - 1 } + 1, \ + h} #define HNS3_RX_PTYPE_UNUSED_ENTRY(ptype) \ - { ptype, 0, CHECKSUM_NONE, HNS3_L3_TYPE_PARSE_FAIL, 0 } + { ptype, 0, CHECKSUM_NONE, HNS3_L3_TYPE_PARSE_FAIL, 0, \ + PKT_HASH_TYPE_NONE } static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = { HNS3_RX_PTYPE_UNUSED_ENTRY(0), - HNS3_RX_PTYPE_ENTRY(1, 0, COMPLETE, ARP), - HNS3_RX_PTYPE_ENTRY(2, 0, COMPLETE, RARP), - HNS3_RX_PTYPE_ENTRY(3, 0, COMPLETE, LLDP), - HNS3_RX_PTYPE_ENTRY(4, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(5, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(6, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(7, 0, COMPLETE, CNM), - HNS3_RX_PTYPE_ENTRY(8, 0, NONE, PARSE_FAIL), + HNS3_RX_PTYPE_ENTRY(1, 0, COMPLETE, ARP, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(2, 0, COMPLETE, RARP, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(3, 0, COMPLETE, LLDP, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(4, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(5, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(6, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(7, 0, COMPLETE, CNM, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(8, 0, NONE, PARSE_FAIL, PKT_HASH_TYPE_NONE), HNS3_RX_PTYPE_UNUSED_ENTRY(9), HNS3_RX_PTYPE_UNUSED_ENTRY(10), HNS3_RX_PTYPE_UNUSED_ENTRY(11), @@ -132,36 +134,36 @@ static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = { HNS3_RX_PTYPE_UNUSED_ENTRY(13), HNS3_RX_PTYPE_UNUSED_ENTRY(14), HNS3_RX_PTYPE_UNUSED_ENTRY(15), - HNS3_RX_PTYPE_ENTRY(16, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(17, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(18, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(19, 0, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(20, 0, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(21, 0, NONE, IPV4), - HNS3_RX_PTYPE_ENTRY(22, 0, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(23, 0, NONE, IPV4), - HNS3_RX_PTYPE_ENTRY(24, 0, NONE, IPV4), - HNS3_RX_PTYPE_ENTRY(25, 0, UNNECESSARY, IPV4), + HNS3_RX_PTYPE_ENTRY(16, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(17, 0, COMPLETE, IPV4, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(18, 0, COMPLETE, IPV4, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(19, 0, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(20, 0, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(21, 0, NONE, IPV4, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(22, 0, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(23, 0, NONE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(24, 0, NONE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(25, 0, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), HNS3_RX_PTYPE_UNUSED_ENTRY(26), HNS3_RX_PTYPE_UNUSED_ENTRY(27), HNS3_RX_PTYPE_UNUSED_ENTRY(28), - HNS3_RX_PTYPE_ENTRY(29, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(30, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(31, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(32, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(33, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(34, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(35, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(36, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(37, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(29, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(30, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(31, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(32, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(33, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(34, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(35, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(36, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(37, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), HNS3_RX_PTYPE_UNUSED_ENTRY(38), - HNS3_RX_PTYPE_ENTRY(39, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(40, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(41, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(42, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(43, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(44, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(45, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(39, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(40, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(41, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(42, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(43, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(44, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(45, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), HNS3_RX_PTYPE_UNUSED_ENTRY(46), HNS3_RX_PTYPE_UNUSED_ENTRY(47), HNS3_RX_PTYPE_UNUSED_ENTRY(48), @@ -227,35 +229,35 @@ static const struct hns3_rx_ptype hns3_rx_ptype_tbl[] = { HNS3_RX_PTYPE_UNUSED_ENTRY(108), HNS3_RX_PTYPE_UNUSED_ENTRY(109), HNS3_RX_PTYPE_UNUSED_ENTRY(110), - HNS3_RX_PTYPE_ENTRY(111, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(112, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(113, 0, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(114, 0, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(115, 0, NONE, IPV6), - HNS3_RX_PTYPE_ENTRY(116, 0, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(117, 0, NONE, IPV6), - HNS3_RX_PTYPE_ENTRY(118, 0, NONE, IPV6), - HNS3_RX_PTYPE_ENTRY(119, 0, UNNECESSARY, IPV6), + HNS3_RX_PTYPE_ENTRY(111, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(112, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(113, 0, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(114, 0, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(115, 0, NONE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(116, 0, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(117, 0, NONE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(118, 0, NONE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(119, 0, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), HNS3_RX_PTYPE_UNUSED_ENTRY(120), HNS3_RX_PTYPE_UNUSED_ENTRY(121), HNS3_RX_PTYPE_UNUSED_ENTRY(122), - HNS3_RX_PTYPE_ENTRY(123, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(124, 0, COMPLETE, PARSE_FAIL), - HNS3_RX_PTYPE_ENTRY(125, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(126, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(127, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(128, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(129, 1, UNNECESSARY, IPV4), - HNS3_RX_PTYPE_ENTRY(130, 0, COMPLETE, IPV4), - HNS3_RX_PTYPE_ENTRY(131, 0, COMPLETE, IPV4), + HNS3_RX_PTYPE_ENTRY(123, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(124, 0, COMPLETE, PARSE_FAIL, PKT_HASH_TYPE_NONE), + HNS3_RX_PTYPE_ENTRY(125, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(126, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(127, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(128, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(129, 1, UNNECESSARY, IPV4, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(130, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(131, 0, COMPLETE, IPV4, PKT_HASH_TYPE_L3), HNS3_RX_PTYPE_UNUSED_ENTRY(132), - HNS3_RX_PTYPE_ENTRY(133, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(134, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(135, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(136, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(137, 1, UNNECESSARY, IPV6), - HNS3_RX_PTYPE_ENTRY(138, 0, COMPLETE, IPV6), - HNS3_RX_PTYPE_ENTRY(139, 0, COMPLETE, IPV6), + HNS3_RX_PTYPE_ENTRY(133, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(134, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(135, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(136, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(137, 1, UNNECESSARY, IPV6, PKT_HASH_TYPE_L4), + HNS3_RX_PTYPE_ENTRY(138, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), + HNS3_RX_PTYPE_ENTRY(139, 0, COMPLETE, IPV6, PKT_HASH_TYPE_L3), HNS3_RX_PTYPE_UNUSED_ENTRY(140), HNS3_RX_PTYPE_UNUSED_ENTRY(141), HNS3_RX_PTYPE_UNUSED_ENTRY(142), @@ -4171,15 +4173,35 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring, } static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring, - struct sk_buff *skb, u32 rss_hash) + struct sk_buff *skb, u32 rss_hash, + u32 l234info, u32 ol_info) { - struct hnae3_handle *handle = ring->tqp->handle; - enum pkt_hash_types rss_type; + enum pkt_hash_types rss_type = PKT_HASH_TYPE_NONE; + struct net_device *netdev = ring_to_netdev(ring); + struct hns3_nic_priv *priv = netdev_priv(netdev); - if (rss_hash) - rss_type = handle->kinfo.rss_type; - else - rss_type = PKT_HASH_TYPE_NONE; + if (test_bit(HNS3_NIC_STATE_RXD_ADV_LAYOUT_ENABLE, &priv->state)) { + u32 ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M, + HNS3_RXD_PTYPE_S); + + rss_type = hns3_rx_ptype_tbl[ptype].hash_type; + } else { + int l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, + HNS3_RXD_L3ID_S); + int l4_type = hnae3_get_field(l234info, HNS3_RXD_L4ID_M, + HNS3_RXD_L4ID_S); + + if (l3_type == HNS3_L3_TYPE_IPV4 || + l3_type == HNS3_L3_TYPE_IPV6) { + if (l4_type == HNS3_L4_TYPE_UDP || + l4_type == HNS3_L4_TYPE_TCP || + l4_type == HNS3_L4_TYPE_SCTP) + rss_type = PKT_HASH_TYPE_L4; + else if (l4_type == HNS3_L4_TYPE_IGMP || + l4_type == HNS3_L4_TYPE_ICMP) + rss_type = PKT_HASH_TYPE_L3; + } + } skb_set_hash(skb, rss_hash, rss_type); } @@ -4282,7 +4304,8 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb) ring->tqp_vector->rx_group.total_bytes += len; - hns3_set_rx_skb_rss_type(ring, skb, le32_to_cpu(desc->rx.rss_hash)); + hns3_set_rx_skb_rss_type(ring, skb, le32_to_cpu(desc->rx.rss_hash), + l234info, ol_info); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 133a054af6b7..294a14b4fdef 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -404,6 +404,7 @@ struct hns3_rx_ptype { u32 ip_summed : 2; u32 l3_type : 4; u32 valid : 1; + u32 hash_type: 3; }; struct ring_stats { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 987271da6e9b..e34ac9c5900e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4859,7 +4859,6 @@ static int hclge_set_rss_tuple(struct hnae3_handle *handle, return ret; } - hclge_comm_get_rss_type(&vport->nic, &hdev->rss_cfg.rss_tuple_sets); return 0; } From 29df7c695ed67a8fa32bb7805bad8fe2a76c1f88 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Mon, 14 Nov 2022 16:20:47 +0800 Subject: [PATCH 256/963] net: hns3: fix return value check bug of rx copybreak The refactoring of rx copybreak modifies the original return logic, which will make this feature unavailable. So this patch fixes the return logic of rx copybreak. Fixes: e74a726da2c4 ("net: hns3: refactor hns3_nic_reuse_page()") Fixes: 99f6b5fb5f63 ("net: hns3: use bounce buffer when rx page can not be reused") Signed-off-by: Jie Wang Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 7fc83409f257..028577943ec5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3778,8 +3778,8 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, desc_cb->reuse_flag = 1; } else if (frag_size <= ring->rx_copybreak) { ret = hns3_handle_rx_copybreak(skb, i, ring, pull_len, desc_cb); - if (ret) - goto out; + if (!ret) + return; } out: From 510d7b6ae842e59ee00d57e5f07ac15131b6d899 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Mon, 14 Nov 2022 16:20:48 +0800 Subject: [PATCH 257/963] net: hns3: fix setting incorrect phy link ksettings for firmware in resetting process Currently, if driver is in phy-imp(phy controlled by imp firmware) mode, as driver did not update phy link ksettings after initialization process or not update advertising when getting phy link ksettings from firmware, it may set incorrect phy link ksettings for firmware in resetting process. So fix it. Fixes: f5f2b3e4dcc0 ("net: hns3: add support for imp-controlled PHYs") Fixes: c5ef83cbb1e9 ("net: hns3: fix for phy_addr error in hclge_mac_mdio_config") Fixes: 2312e050f42b ("net: hns3: Fix for deadlock problem occurring when unregistering ae_algo") Signed-off-by: Guangbin Huang Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e34ac9c5900e..4e54f91f7a6c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3443,6 +3443,7 @@ static int hclge_update_tp_port_info(struct hclge_dev *hdev) hdev->hw.mac.autoneg = cmd.base.autoneg; hdev->hw.mac.speed = cmd.base.speed; hdev->hw.mac.duplex = cmd.base.duplex; + linkmode_copy(hdev->hw.mac.advertising, cmd.link_modes.advertising); return 0; } @@ -11586,9 +11587,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_msi_irq_uninit; - if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER && - !hnae3_dev_phy_imp_supported(hdev)) { - ret = hclge_mac_mdio_config(hdev); + if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) { + if (hnae3_dev_phy_imp_supported(hdev)) + ret = hclge_update_tp_port_info(hdev); + else + ret = hclge_mac_mdio_config(hdev); + if (ret) goto err_msi_irq_uninit; } From 9d45921ee4cb364910097e7d1b7558559c2f9fd2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Nov 2022 10:45:09 +0200 Subject: [PATCH 258/963] bridge: switchdev: Fix memory leaks when changing VLAN protocol The bridge driver can offload VLANs to the underlying hardware either via switchdev or the 8021q driver. When the former is used, the VLAN is marked in the bridge driver with the 'BR_VLFLAG_ADDED_BY_SWITCHDEV' private flag. To avoid the memory leaks mentioned in the cited commit, the bridge driver will try to delete a VLAN via the 8021q driver if the VLAN is not marked with the previously mentioned flag. When the VLAN protocol of the bridge changes, switchdev drivers are notified via the 'SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL' attribute, but the 8021q driver is also called to add the existing VLANs with the new protocol and delete them with the old protocol. In case the VLANs were offloaded via switchdev, the above behavior is both redundant and buggy. Redundant because the VLANs are already programmed in hardware and drivers that support VLAN protocol change (currently only mlx5) change the protocol upon the switchdev attribute notification. Buggy because the 8021q driver is called despite these VLANs being marked with 'BR_VLFLAG_ADDED_BY_SWITCHDEV'. This leads to memory leaks [1] when the VLANs are deleted. Fix by not calling the 8021q driver for VLANs that were already programmed via switchdev. [1] unreferenced object 0xffff8881f6771200 (size 256): comm "ip", pid 446855, jiffies 4298238841 (age 55.240s) hex dump (first 32 bytes): 00 00 7f 0e 83 88 ff ff 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000012819ac>] vlan_vid_add+0x437/0x750 [<00000000f2281fad>] __br_vlan_set_proto+0x289/0x920 [<000000000632b56f>] br_changelink+0x3d6/0x13f0 [<0000000089d25f04>] __rtnl_newlink+0x8ae/0x14c0 [<00000000f6276baf>] rtnl_newlink+0x5f/0x90 [<00000000746dc902>] rtnetlink_rcv_msg+0x336/0xa00 [<000000001c2241c0>] netlink_rcv_skb+0x11d/0x340 [<0000000010588814>] netlink_unicast+0x438/0x710 [<00000000e1a4cd5c>] netlink_sendmsg+0x788/0xc40 [<00000000e8992d4e>] sock_sendmsg+0xb0/0xe0 [<00000000621b8f91>] ____sys_sendmsg+0x4ff/0x6d0 [<000000000ea26996>] ___sys_sendmsg+0x12e/0x1b0 [<00000000684f7e25>] __sys_sendmsg+0xab/0x130 [<000000004538b104>] do_syscall_64+0x3d/0x90 [<0000000091ed9678>] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 279737939a81 ("net: bridge: Fix VLANs memory leak") Reported-by: Vlad Buslov Tested-by: Vlad Buslov Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20221114084509.860831-1-idosch@nvidia.com Signed-off-by: Paolo Abeni --- net/bridge/br_vlan.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 6e53dc991409..9ffd40b8270c 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -959,6 +959,8 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; err = vlan_vid_add(p->dev, proto, vlan->vid); if (err) goto err_filt; @@ -973,8 +975,11 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, /* Delete VLANs for the old proto from the device filter. */ list_for_each_entry(p, &br->port_list, list) { vg = nbp_vlan_group(p); - list_for_each_entry(vlan, &vg->vlan_list, vlist) + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, oldproto, vlan->vid); + } } return 0; @@ -983,13 +988,19 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto, attr.u.vlan_protocol = ntohs(oldproto); switchdev_port_attr_set(br->dev, &attr, NULL); - list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) + list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, proto, vlan->vid); + } list_for_each_entry_continue_reverse(p, &br->port_list, list) { vg = nbp_vlan_group(p); - list_for_each_entry(vlan, &vg->vlan_list, vlist) + list_for_each_entry(vlan, &vg->vlan_list, vlist) { + if (vlan->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + continue; vlan_vid_del(p->dev, proto, vlan->vid); + } } return err; From dfd10332596ef11ceafd29c4e21b4117be423fc4 Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Mon, 14 Nov 2022 16:11:27 -0800 Subject: [PATCH 259/963] fpga: m10bmc-sec: Fix kconfig dependencies The secure update driver depends on the firmware-upload functionality of the firmware-loader. The firmware-loader is carried in the firmware-class driver which is enabled with the tristate CONFIG_FW_LOADER option. The firmware-upload functionality is included in the firmware-class driver if the bool FW_UPLOAD config is set. The current dependency statement, "depends on FW_UPLOAD", is not adequate because it does not implicitly turn on FW_LOADER. Instead of adding a dependency, follow the convention used by drivers that require the FW_LOADER_USER_HELPER functionality of the firmware-loader by using select for both FW_LOADER and FW_UPLOAD. Fixes: bdf86d0e6ca3 ("fpga: m10bmc-sec: create max10 bmc secure update") Reported-by: kernel test robot Cc: stable@vger.kernel.org Signed-off-by: Russ Weight Acked-by: Randy Dunlap Acked-by: Xu Yilun Link: https://lore.kernel.org/r/20221115001127.289890-1-russell.h.weight@intel.com Signed-off-by: Xu Yilun --- drivers/fpga/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig index 6c416955da53..bbe0a7cabb75 100644 --- a/drivers/fpga/Kconfig +++ b/drivers/fpga/Kconfig @@ -246,7 +246,9 @@ config FPGA_MGR_VERSAL_FPGA config FPGA_M10_BMC_SEC_UPDATE tristate "Intel MAX10 BMC Secure Update driver" - depends on MFD_INTEL_M10_BMC && FW_UPLOAD + depends on MFD_INTEL_M10_BMC + select FW_LOADER + select FW_UPLOAD help Secure update support for the Intel MAX10 board management controller. From bdcdd86ca94b5e9faa18d6f4d3dda660ac5c887e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 11 Nov 2022 00:54:40 +0000 Subject: [PATCH 260/963] btrfs: fix assertion failure and blocking during nowait buffered write When doing a nowait buffered write we can trigger the following assertion: [11138.437027] assertion failed: !path->nowait, in fs/btrfs/ctree.c:4658 [11138.438251] ------------[ cut here ]------------ [11138.438254] kernel BUG at fs/btrfs/messages.c:259! [11138.438762] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI [11138.439450] CPU: 4 PID: 1091021 Comm: fsstress Not tainted 6.1.0-rc4-btrfs-next-128 #1 [11138.440611] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [11138.442553] RIP: 0010:btrfs_assertfail+0x19/0x1b [btrfs] [11138.443583] Code: 5b 41 5a 41 (...) [11138.446437] RSP: 0018:ffffbaf0cf05b840 EFLAGS: 00010246 [11138.447235] RAX: 0000000000000039 RBX: ffffbaf0cf05b938 RCX: 0000000000000000 [11138.448303] RDX: 0000000000000000 RSI: ffffffffb2ef59f6 RDI: 00000000ffffffff [11138.449370] RBP: ffff9165f581eb68 R08: 00000000ffffffff R09: 0000000000000001 [11138.450493] R10: ffff9167a88421f8 R11: 0000000000000000 R12: ffff9164981b1000 [11138.451661] R13: 000000008c8f1000 R14: ffff9164991d4000 R15: ffff9164981b1000 [11138.452225] FS: 00007f1438a66440(0000) GS:ffff9167ad600000(0000) knlGS:0000000000000000 [11138.452949] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [11138.453394] CR2: 00007f1438a64000 CR3: 0000000100c36002 CR4: 0000000000370ee0 [11138.454057] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [11138.454879] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [11138.455779] Call Trace: [11138.456211] [11138.456598] btrfs_next_old_leaf.cold+0x18/0x1d [btrfs] [11138.457827] ? kmem_cache_alloc+0x18d/0x2a0 [11138.458516] btrfs_lookup_csums_range+0x149/0x4d0 [btrfs] [11138.459407] csum_exist_in_range+0x56/0x110 [btrfs] [11138.460271] can_nocow_file_extent+0x27c/0x310 [btrfs] [11138.461155] can_nocow_extent+0x1ec/0x2e0 [btrfs] [11138.461672] btrfs_check_nocow_lock+0x114/0x1c0 [btrfs] [11138.462951] btrfs_buffered_write+0x44c/0x8e0 [btrfs] [11138.463482] btrfs_do_write_iter+0x42b/0x5f0 [btrfs] [11138.463982] ? lock_release+0x153/0x4a0 [11138.464347] io_write+0x11b/0x570 [11138.464660] ? lock_release+0x153/0x4a0 [11138.465213] ? lock_is_held_type+0xe8/0x140 [11138.466003] io_issue_sqe+0x63/0x4a0 [11138.466339] io_submit_sqes+0x238/0x770 [11138.466741] __do_sys_io_uring_enter+0x37b/0xb10 [11138.467206] ? lock_is_held_type+0xe8/0x140 [11138.467879] ? syscall_enter_from_user_mode+0x1d/0x50 [11138.468688] do_syscall_64+0x38/0x90 [11138.469265] entry_SYSCALL_64_after_hwframe+0x63/0xcd [11138.470017] RIP: 0033:0x7f1438c539e6 This is because to check if we can NOCOW, we check that if we can NOCOW into an extent (it's prealloc extent or the inode has NOCOW attribute), and then check if there are csums for the extent's range in the csum tree. The search may leave us beyond the last slot of a leaf, and then when we call btrfs_next_leaf() we end up at btrfs_next_old_leaf() with a time_seq of 0. This triggers a failure of the first assertion at btrfs_next_old_leaf(), since we have a nowait path. With assertions disabled, we simply don't respect the NOWAIT semantics, allowing the write to block on locks or blocking on IO for reading an extent buffer from disk. Fix this by: 1) Triggering the assertion only if time_seq is not 0, which means that search is being done by a tree mod log user, and in the buffered and direct IO write paths we don't use the tree mod log; 2) Implementing NOWAIT semantics at btrfs_next_old_leaf(). Any failure to lock an extent buffer should return immediately and not retry the search, as well as if we need to do IO to read an extent buffer from disk. Fixes: c922b016f353 ("btrfs: assert nowait mode is not used for some btree search functions") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a9543f01184c..dcb510f38dda 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -4663,7 +4663,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, int ret; int i; - ASSERT(!path->nowait); + /* + * The nowait semantics are used only for write paths, where we don't + * use the tree mod log and sequence numbers. + */ + if (time_seq) + ASSERT(!path->nowait); nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) @@ -4683,7 +4688,14 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, if (path->need_commit_sem) { path->need_commit_sem = 0; need_commit_sem = true; - down_read(&fs_info->commit_root_sem); + if (path->nowait) { + if (!down_read_trylock(&fs_info->commit_root_sem)) { + ret = -EAGAIN; + goto done; + } + } else { + down_read(&fs_info->commit_root_sem); + } } ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); } @@ -4759,7 +4771,7 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, next = c; ret = read_block_for_search(root, path, &next, level, slot, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4769,6 +4781,10 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); + if (!ret && path->nowait) { + ret = -EAGAIN; + goto done; + } if (!ret && time_seq) { /* * If we don't get the lock, we may be racing @@ -4799,7 +4815,7 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, ret = read_block_for_search(root, path, &next, level, 0, &key); - if (ret == -EAGAIN) + if (ret == -EAGAIN && !path->nowait) goto again; if (ret < 0) { @@ -4807,8 +4823,16 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, goto done; } - if (!path->skip_locking) - btrfs_tree_read_lock(next); + if (!path->skip_locking) { + if (path->nowait) { + if (!btrfs_try_tree_read_lock(next)) { + ret = -EAGAIN; + goto done; + } + } else { + btrfs_tree_read_lock(next); + } + } } ret = 0; done: From b740d806166979488e798e41743aaec051f2443f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 7 Nov 2022 11:44:51 -0500 Subject: [PATCH 261/963] btrfs: free btrfs_path before copying root refs to userspace Syzbot reported the following lockdep splat ====================================================== WARNING: possible circular locking dependency detected 6.0.0-rc7-syzkaller-18095-gbbed346d5a96 #0 Not tainted ------------------------------------------------------ syz-executor307/3029 is trying to acquire lock: ffff0000c02525d8 (&mm->mmap_lock){++++}-{3:3}, at: __might_fault+0x54/0xb4 mm/memory.c:5576 but task is already holding lock: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (btrfs-root-00){++++}-{3:3}: down_read_nested+0x64/0x84 kernel/locking/rwsem.c:1624 __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 btrfs_search_slot_get_root+0x74/0x338 fs/btrfs/ctree.c:1637 btrfs_search_slot+0x1b0/0xfd8 fs/btrfs/ctree.c:1944 btrfs_update_root+0x6c/0x5a0 fs/btrfs/root-tree.c:132 commit_fs_roots+0x1f0/0x33c fs/btrfs/transaction.c:1459 btrfs_commit_transaction+0x89c/0x12d8 fs/btrfs/transaction.c:2343 flush_space+0x66c/0x738 fs/btrfs/space-info.c:786 btrfs_async_reclaim_metadata_space+0x43c/0x4e0 fs/btrfs/space-info.c:1059 process_one_work+0x2d8/0x504 kernel/workqueue.c:2289 worker_thread+0x340/0x610 kernel/workqueue.c:2436 kthread+0x12c/0x158 kernel/kthread.c:376 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 -> #2 (&fs_info->reloc_mutex){+.+.}-{3:3}: __mutex_lock_common+0xd4/0xca8 kernel/locking/mutex.c:603 __mutex_lock kernel/locking/mutex.c:747 [inline] mutex_lock_nested+0x38/0x44 kernel/locking/mutex.c:799 btrfs_record_root_in_trans fs/btrfs/transaction.c:516 [inline] start_transaction+0x248/0x944 fs/btrfs/transaction.c:752 btrfs_start_transaction+0x34/0x44 fs/btrfs/transaction.c:781 btrfs_create_common+0xf0/0x1b4 fs/btrfs/inode.c:6651 btrfs_create+0x8c/0xb0 fs/btrfs/inode.c:6697 lookup_open fs/namei.c:3413 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x804/0x11c4 fs/namei.c:3688 do_filp_open+0xdc/0x1b8 fs/namei.c:3718 do_sys_openat2+0xb8/0x22c fs/open.c:1313 do_sys_open fs/open.c:1329 [inline] __do_sys_openat fs/open.c:1345 [inline] __se_sys_openat fs/open.c:1340 [inline] __arm64_sys_openat+0xb0/0xe0 fs/open.c:1340 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 -> #1 (sb_internal#2){.+.+}-{0:0}: percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write include/linux/fs.h:1826 [inline] sb_start_intwrite include/linux/fs.h:1948 [inline] start_transaction+0x360/0x944 fs/btrfs/transaction.c:683 btrfs_join_transaction+0x30/0x40 fs/btrfs/transaction.c:795 btrfs_dirty_inode+0x50/0x140 fs/btrfs/inode.c:6103 btrfs_update_time+0x1c0/0x1e8 fs/btrfs/inode.c:6145 inode_update_time fs/inode.c:1872 [inline] touch_atime+0x1f0/0x4a8 fs/inode.c:1945 file_accessed include/linux/fs.h:2516 [inline] btrfs_file_mmap+0x50/0x88 fs/btrfs/file.c:2407 call_mmap include/linux/fs.h:2192 [inline] mmap_region+0x7fc/0xc14 mm/mmap.c:1752 do_mmap+0x644/0x97c mm/mmap.c:1540 vm_mmap_pgoff+0xe8/0x1d0 mm/util.c:552 ksys_mmap_pgoff+0x1cc/0x278 mm/mmap.c:1586 __do_sys_mmap arch/arm64/kernel/sys.c:28 [inline] __se_sys_mmap arch/arm64/kernel/sys.c:21 [inline] __arm64_sys_mmap+0x58/0x6c arch/arm64/kernel/sys.c:21 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 -> #0 (&mm->mmap_lock){++++}-{3:3}: check_prev_add kernel/locking/lockdep.c:3095 [inline] check_prevs_add kernel/locking/lockdep.c:3214 [inline] validate_chain kernel/locking/lockdep.c:3829 [inline] __lock_acquire+0x1530/0x30a4 kernel/locking/lockdep.c:5053 lock_acquire+0x100/0x1f8 kernel/locking/lockdep.c:5666 __might_fault+0x7c/0xb4 mm/memory.c:5577 _copy_to_user include/linux/uaccess.h:134 [inline] copy_to_user include/linux/uaccess.h:160 [inline] btrfs_ioctl_get_subvol_rootref+0x3a8/0x4bc fs/btrfs/ioctl.c:3203 btrfs_ioctl+0xa08/0xa64 fs/btrfs/ioctl.c:5556 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __arm64_sys_ioctl+0xd0/0x140 fs/ioctl.c:856 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 other info that might help us debug this: Chain exists of: &mm->mmap_lock --> &fs_info->reloc_mutex --> btrfs-root-00 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(btrfs-root-00); lock(&fs_info->reloc_mutex); lock(btrfs-root-00); lock(&mm->mmap_lock); *** DEADLOCK *** 1 lock held by syz-executor307/3029: #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock fs/btrfs/locking.c:134 [inline] #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_tree_read_lock fs/btrfs/locking.c:140 [inline] #0: ffff0000c958a608 (btrfs-root-00){++++}-{3:3}, at: btrfs_read_lock_root_node+0x13c/0x1c0 fs/btrfs/locking.c:279 stack backtrace: CPU: 0 PID: 3029 Comm: syz-executor307 Not tainted 6.0.0-rc7-syzkaller-18095-gbbed346d5a96 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/30/2022 Call trace: dump_backtrace+0x1c4/0x1f0 arch/arm64/kernel/stacktrace.c:156 show_stack+0x2c/0x54 arch/arm64/kernel/stacktrace.c:163 __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x104/0x16c lib/dump_stack.c:106 dump_stack+0x1c/0x58 lib/dump_stack.c:113 print_circular_bug+0x2c4/0x2c8 kernel/locking/lockdep.c:2053 check_noncircular+0x14c/0x154 kernel/locking/lockdep.c:2175 check_prev_add kernel/locking/lockdep.c:3095 [inline] check_prevs_add kernel/locking/lockdep.c:3214 [inline] validate_chain kernel/locking/lockdep.c:3829 [inline] __lock_acquire+0x1530/0x30a4 kernel/locking/lockdep.c:5053 lock_acquire+0x100/0x1f8 kernel/locking/lockdep.c:5666 __might_fault+0x7c/0xb4 mm/memory.c:5577 _copy_to_user include/linux/uaccess.h:134 [inline] copy_to_user include/linux/uaccess.h:160 [inline] btrfs_ioctl_get_subvol_rootref+0x3a8/0x4bc fs/btrfs/ioctl.c:3203 btrfs_ioctl+0xa08/0xa64 fs/btrfs/ioctl.c:5556 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __arm64_sys_ioctl+0xd0/0x140 fs/ioctl.c:856 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] invoke_syscall arch/arm64/kernel/syscall.c:52 [inline] el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581 We do generally the right thing here, copying the references into a temporary buffer, however we are still holding the path when we do copy_to_user from the temporary buffer. Fix this by freeing the path before we copy to user space. Reported-by: syzbot+4ef9e52e464c6ff47d9d@syzkaller.appspotmail.com CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d5dd8bed1488..89b8d14cb68c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3194,6 +3194,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root, } out: + btrfs_free_path(path); + if (!ret || ret == -EOVERFLOW) { rootrefs->num_items = found; /* update min_treeid for next search */ @@ -3205,7 +3207,6 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root, } kfree(rootrefs); - btrfs_free_path(path); return ret; } From eca13f3c67b6ddfcc61fdb9bb1c5f9a7724e2359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 9 Nov 2022 19:54:09 +0100 Subject: [PATCH 262/963] drm/amdgpu: use the last IB as gang leader v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turned out that not the last IB specified is the gang leader, but instead the last job allocated. This is a bit unfortunate and not very intuitive for the CS interface, so try to fix this. Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20221115094206.6181-1-christian.koenig@amd.com Tested-by: Timur Kristóf Acked-by: Timur Kristóf Reviewed-by: Alex Deucher Fixes: 4624459c84d7 ("drm/amdgpu: add gang submit frontend v6") --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 23 ++++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h | 1 + 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 1bbd39b3b0fc..fbdf139cf497 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, return r; ++(num_ibs[r]); + p->gang_leader_idx = r; return 0; } @@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (ret) goto free_all_kdata; } - p->gang_leader = p->jobs[p->gang_size - 1]; + p->gang_leader = p->jobs[p->gang_leader_idx]; if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) { ret = -ECANCELED; @@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - for (i = 0; i < p->gang_size - 1; ++i) { + for (i = 0; i < p->gang_size; ++i) { + if (p->jobs[i] == leader) + continue; + r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync); if (r) return r; } - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]); + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); if (r && r != -ERESTARTSYS) DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - return r; } @@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, for (i = 0; i < p->gang_size; ++i) drm_sched_job_arm(&p->jobs[i]->base); - for (i = 0; i < (p->gang_size - 1); ++i) { + for (i = 0; i < p->gang_size; ++i) { struct dma_fence *fence; + if (p->jobs[i] == leader) + continue; + fence = &p->jobs[i]->base.s_fence->scheduled; r = amdgpu_sync_fence(&leader->sync, fence); if (r) @@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, list_for_each_entry(e, &p->validated, tv.head) { /* Everybody except for the gang leader uses READ */ - for (i = 0; i < (p->gang_size - 1); ++i) { + for (i = 0; i < p->gang_size; ++i) { + if (p->jobs[i] == leader) + continue; + dma_resv_add_fence(e->tv.bo->base.resv, &p->jobs[i]->base.s_fence->finished, DMA_RESV_USAGE_READ); @@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, e->tv.num_shared = 0; } - seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1], + seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], p->fence); amdgpu_cs_post_dependencies(p); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index cbaa19b2b8a3..f80adf9069ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -54,6 +54,7 @@ struct amdgpu_cs_parser { /* scheduler job objects */ unsigned int gang_size; + unsigned int gang_leader_idx; struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE]; struct amdgpu_job *jobs[AMDGPU_CS_GANG_SIZE]; struct amdgpu_job *gang_leader; From 47df8a2f78bc34ff170d147d05b121f84e252b85 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 14 Nov 2022 17:57:33 +0800 Subject: [PATCH 263/963] bpf, perf: Use subprog name when reporting subprog ksymbol Since commit bfea9a8574f3 ("bpf: Add name to struct bpf_ksym"), when reporting subprog ksymbol to perf, prog name instead of subprog name is used. The backtrace of bpf program with subprogs will be incorrect as shown below: ffffffffc02deace bpf_prog_e44a3057dcb151f8_overwrite+0x66 ffffffffc02de9f7 bpf_prog_e44a3057dcb151f8_overwrite+0x9f ffffffffa71d8d4e trace_call_bpf+0xce ffffffffa71c2938 perf_call_bpf_enter.isra.0+0x48 overwrite is the entry program and it invokes the overwrite_htab subprog through bpf_loop, but in above backtrace, overwrite program just jumps inside itself. Fixing it by using subprog name when reporting subprog ksymbol. After the fix, the output of perf script will be correct as shown below: ffffffffc031aad2 bpf_prog_37c0bec7d7c764a4_overwrite_htab+0x66 ffffffffc031a9e7 bpf_prog_c7eb827ef4f23e71_overwrite+0x9f ffffffffa3dd8d4e trace_call_bpf+0xce ffffffffa3dc2938 perf_call_bpf_enter.isra.0+0x48 Fixes: bfea9a8574f3 ("bpf: Add name to struct bpf_ksym") Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20221114095733.158588-1-houtao@huaweicloud.com --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4ec3717003d5..8b50ef2569d9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9030,7 +9030,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, subprog->jited_len, unregister, - prog->aux->ksym.name); + subprog->aux->ksym.name); } } } From a7a1598189228b5007369a9622ccdf587be0730f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Nov 2022 16:16:43 +0300 Subject: [PATCH 264/963] drbd: use after free in drbd_create_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drbd_destroy_connection() frees the "connection" so use the _safe() iterator to prevent a use after free. Fixes: b6f85ef9538b ("drbd: Iterate over all connections") Signed-off-by: Dan Carpenter Reviewed-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/Y3Jd5iZRbNQ9w6gm@kili Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f3e4db16fd07..8532b839a343 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2672,7 +2672,7 @@ static int init_submitter(struct drbd_device *device) enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor) { struct drbd_resource *resource = adm_ctx->resource; - struct drbd_connection *connection; + struct drbd_connection *connection, *n; struct drbd_device *device; struct drbd_peer_device *peer_device, *tmp_peer_device; struct gendisk *disk; @@ -2789,7 +2789,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig return NO_ERROR; out_idr_remove_from_resource: - for_each_connection(connection, resource) { + for_each_connection_safe(connection, n, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); if (peer_device) kref_put(&connection->kref, drbd_destroy_connection); From 2afac81dd16544d825f309fd992d2af6304353df Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 3 Nov 2022 16:57:42 +0100 Subject: [PATCH 265/963] HID: fix I2C_HID not selected when I2C_HID_OF_ELAN is When I2C_HID_OF_ELAN is set, we need to turn on I2C_HID_CORE to ensure we get all the HID requirements. Fixes: bd3cba00dcc6 ("HID: i2c-hid: elan: Add support for Elan eKTH6915 i2c-hid touchscreens") Reported-by: kernel test robot Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig index 5273ee2bb134..d65abe65ce73 100644 --- a/drivers/hid/i2c-hid/Kconfig +++ b/drivers/hid/i2c-hid/Kconfig @@ -66,6 +66,6 @@ endmenu config I2C_HID_CORE tristate - default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_GOODIX=y - default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_GOODIX=m + default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_ELAN=y || I2C_HID_OF_GOODIX=y + default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_ELAN=m || I2C_HID_OF_GOODIX=m select HID From be7d172b0f911e356a1cba5ea96f8e5a116bb7cb Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 9 Nov 2022 21:22:17 +0000 Subject: [PATCH 266/963] MAINTAINERS: add entries for misc. RISC-V SoC drivers and devicetrees Following some discussion both on & off list, I have volunteered to take over maintaining the miscellaneous RISC-V devicetrees & soc drivers from Palmer to ease his load. So far only SiFive and Microchip have stuff in drivers/soc. For the former, a SiFive entry exists with a dead GitHub repo - so remove that to avoid confusion since the patches for drivers/soc & devicetrees will be routed via my tree & other drivers go through their subsystem trees. The Microchip directory only contains a RISC-V driver for now, but is likely to contain drivers for other archs in the future. To that end, change the PolarFire SoC entry to specifically mention the RISC-V driver & the new directory level entry does not mention an architecture. CC: Arnd Bergmann CC: Nicolas Ferre CC: Palmer Dabbelt Link: https://lore.kernel.org/linux-riscv/mhng-e4210f56-fcc3-4db8-abdb-d43b3ebe695d@palmer-ri-x1c9a/ Signed-off-by: Conor Dooley Acked-by: Nicolas Ferre Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20221109212219.1598355-2-conor@kernel.org Signed-off-by: Arnd Bergmann --- MAINTAINERS | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index d3b729b3cbfc..f411c4f136d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13622,6 +13622,12 @@ S: Supported F: drivers/misc/atmel-ssc.c F: include/linux/atmel-ssc.h +MICROCHIP SOC DRIVERS +M: Conor Dooley +S: Supported +T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ +F: drivers/soc/microchip/ + MICROCHIP USB251XB DRIVER M: Richard Leitner L: linux-usb@vger.kernel.org @@ -17742,12 +17748,21 @@ F: drivers/mailbox/mailbox-mpfs.c F: drivers/pci/controller/pcie-microchip-host.c F: drivers/reset/reset-mpfs.c F: drivers/rtc/rtc-mpfs.c -F: drivers/soc/microchip/ +F: drivers/soc/microchip/mpfs-sys-controller.c F: drivers/spi/spi-microchip-core-qspi.c F: drivers/spi/spi-microchip-core.c F: drivers/usb/musb/mpfs.c F: include/soc/microchip/mpfs.h +RISC-V MISC SOC SUPPORT +M: Conor Dooley +L: linux-riscv@lists.infradead.org +S: Maintained +Q: https://patchwork.kernel.org/project/linux-riscv/list/ +T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ +F: Documentation/devicetree/bindings/riscv/ +F: arch/riscv/boot/dts/ + RNBD BLOCK DRIVERS M: Md. Haris Iqbal M: Jack Wang @@ -18785,7 +18800,6 @@ M: Palmer Dabbelt M: Paul Walmsley L: linux-riscv@lists.infradead.org S: Supported -T: git git://github.com/sifive/riscv-linux.git N: sifive K: [^@]sifive @@ -18804,6 +18818,13 @@ S: Maintained F: Documentation/devicetree/bindings/dma/sifive,fu540-c000-pdma.yaml F: drivers/dma/sf-pdma/ +SIFIVE SOC DRIVERS +M: Conor Dooley +L: linux-riscv@lists.infradead.org +S: Maintained +T: git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/ +F: drivers/soc/sifive/ + SILEAD TOUCHSCREEN DRIVER M: Hans de Goede L: linux-input@vger.kernel.org From 5836bf833a763e1573ce6fe5d0c310af23ebd49d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 9 Nov 2022 21:22:18 +0000 Subject: [PATCH 267/963] MAINTAINERS: generify the Microchip RISC-V entry name These drivers work on our other FPGAs, for example the non-SoC PolarFire connected to an FU-540 via chiplink. Make the entry a wee bit more generic to match. While at it, remove the / from the heading so that it matches other, neighbouring RISC-V entries. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221109212219.1598355-3-conor@kernel.org Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f411c4f136d0..c718a8be1297 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17725,7 +17725,7 @@ F: arch/riscv/ N: riscv K: riscv -RISC-V/MICROCHIP POLARFIRE SOC SUPPORT +RISC-V MICROCHIP FPGA SUPPORT M: Conor Dooley M: Daire McNamara L: linux-riscv@lists.infradead.org From 71bbc3c32e5a60c9dfc14632892744679adea76c Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 9 Nov 2022 21:22:19 +0000 Subject: [PATCH 268/963] MAINTAINERS: add an entry for StarFive devicetrees Emil looks after the downstream StarFive stuff, and agreed to look after the upstream ones too. CC: Emil Renner Berthing Signed-off-by: Conor Dooley Acked-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20221109212219.1598355-4-conor@kernel.org Signed-off-by: Arnd Bergmann --- MAINTAINERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c718a8be1297..4a986c8fced0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19626,6 +19626,11 @@ M: Ion Badulescu S: Odd Fixes F: drivers/net/ethernet/adaptec/starfire* +STARFIVE DEVICETREES +M: Emil Renner Berthing +S: Maintained +F: arch/riscv/boot/dts/starfive/ + STARFIVE JH7100 CLOCK DRIVERS M: Emil Renner Berthing S: Maintained From 6827df0911c3042a60eb0a462bf7a620f9e33879 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 9 Nov 2022 21:22:20 +0000 Subject: [PATCH 269/963] MAINTAINERS: repair Microchip corei2c driver entry The driver was renamed before application but the relevant change did not propagate to the MAINTAINERS patch that was applied. Repair it. CC: Lukas Bulwahn Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221109212219.1598355-5-conor@kernel.org Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4a986c8fced0..354cc6639972 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17743,7 +17743,7 @@ F: Documentation/devicetree/bindings/usb/microchip,mpfs-musb.yaml F: arch/riscv/boot/dts/microchip/ F: drivers/char/hw_random/mpfs-rng.c F: drivers/clk/microchip/clk-mpfs.c -F: drivers/i2c/busses/i2c-microchip-core.c +F: drivers/i2c/busses/i2c-microchip-corei2c.c F: drivers/mailbox/mailbox-mpfs.c F: drivers/pci/controller/pcie-microchip-host.c F: drivers/reset/reset-mpfs.c From 6412518f5cb953cbd84b6746db9741bfc92be40a Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Wed, 9 Nov 2022 14:03:46 +0530 Subject: [PATCH 270/963] platform/x86/amd: pmc: Add new ACPI ID AMDI0009 Add new a new ACPI ID AMDI0009 used by upcoming AMD platform to the pmc supported list of devices. Cc: stable@vger.kernel.org # 6.0 Signed-off-by: Shyam Sundar S K Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20221109083346.361603-1-Shyam-sundar.S-k@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 3b44f2fff5be..ef4ae977b8e0 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -962,6 +962,7 @@ static const struct acpi_device_id amd_pmc_acpi_ids[] = { {"AMDI0006", 0}, {"AMDI0007", 0}, {"AMDI0008", 0}, + {"AMDI0009", 0}, {"AMD0004", 0}, {"AMD0005", 0}, { } From 53e16a6e3e69425081f8352e13e9fd23bf1abfca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lennard=20G=C3=A4her?= Date: Tue, 8 Nov 2022 08:20:23 +0100 Subject: [PATCH 271/963] platform/x86: thinkpad_acpi: Enable s2idle quirk for 21A1 machine type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the s2idle quirk was only active for the 21A0 machine type of the P14s Gen2a product. This also enables it for the second 21A1 type, thus reducing wake-up times from s2idle. Signed-off-by: Lennard Gäher Suggested-by: Mario Limonciello Reviewed-by: Mario Limonciello Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2181 Link: https://lore.kernel.org/r/20221108072023.17069-1-gaeher@mpi-sws.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 20e5c043a8e8..8476dfef4e62 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -4497,6 +4497,14 @@ static const struct dmi_system_id fwbug_list[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "21A0"), } }, + { + .ident = "P14s Gen2 AMD", + .driver_data = &quirk_s2idle_bug, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21A1"), + } + }, {} }; From 2dbfb3f33350e1e868d3d7ed4c176d8777150878 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= Date: Thu, 10 Nov 2022 17:31:44 +0100 Subject: [PATCH 272/963] platform/x86/intel: pmc: Don't unconditionally attach Intel PMC when virtualized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current logic in the Intel PMC driver will forcefully attach it when detecting any CPU on the intel_pmc_core_platform_ids array, even if the matching ACPI device is not present. There's no checking in pmc_core_probe() to assert that the PMC device is present, and hence on virtualized environments the PMC device probes successfully, even if the underlying registers are not present. Before commit 21ae43570940 ("platform/x86: intel_pmc_core: Substitute PCI with CPUID enumeration") the driver would check for the presence of a specific PCI device, and that prevented the driver from attaching when running virtualized. Fix by only forcefully attaching the PMC device when not running virtualized. Note that virtualized platforms can still get the device to load if the appropriate ACPI device is present on the tables provided to the VM. Make an exception for the Xen initial domain, which does have full hardware access, and hence can attach to the PMC if present. Fixes: 21ae43570940 ("platform/x86: intel_pmc_core: Substitute PCI with CPUID enumeration") Signed-off-by: Roger Pau Monné Acked-by: David E. Box Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221110163145.80374-1-roger.pau@citrix.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/pmc/pltdrv.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/intel/pmc/pltdrv.c b/drivers/platform/x86/intel/pmc/pltdrv.c index 15ca8afdd973..ddfba38c2104 100644 --- a/drivers/platform/x86/intel/pmc/pltdrv.c +++ b/drivers/platform/x86/intel/pmc/pltdrv.c @@ -18,6 +18,8 @@ #include #include +#include + static void intel_pmc_core_release(struct device *dev) { kfree(dev); @@ -53,6 +55,13 @@ static int __init pmc_core_platform_init(void) if (acpi_dev_present("INT33A1", NULL, -1)) return -ENODEV; + /* + * Skip forcefully attaching the device for VMs. Make an exception for + * Xen dom0, which does have full hardware access. + */ + if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR) && !xen_initial_domain()) + return -ENODEV; + if (!x86_match_cpu(intel_pmc_core_platform_ids)) return -ENODEV; From 418ffb9e3cf6c4e2574d3a732b724916684bd133 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 10 Nov 2022 11:36:28 +0530 Subject: [PATCH 273/963] btrfs: free btrfs_path before copying inodes to userspace btrfs_ioctl_logical_to_ino() frees the search path after the userspace copy from the temp buffer @inodes. Which potentially can lead to a lock splat. Fix this by freeing the path before we copy @inodes to userspace. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 89b8d14cb68c..b595f2c6dfc9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4282,21 +4282,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, size = min_t(u32, loi->size, SZ_16M); } + inodes = init_data_container(size); + if (IS_ERR(inodes)) { + ret = PTR_ERR(inodes); + goto out_loi; + } + path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } - - inodes = init_data_container(size); - if (IS_ERR(inodes)) { - ret = PTR_ERR(inodes); - inodes = NULL; - goto out; - } - ret = iterate_inodes_from_logical(loi->logical, fs_info, path, inodes, ignore_offset); + btrfs_free_path(path); if (ret == -EINVAL) ret = -ENOENT; if (ret < 0) @@ -4308,7 +4307,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, ret = -EFAULT; out: - btrfs_free_path(path); kvfree(inodes); out_loi: kfree(loi); From 8cf96b409d9b3946ece58ced13f92d0f775b0442 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 10 Nov 2022 11:36:29 +0530 Subject: [PATCH 274/963] btrfs: free btrfs_path before copying fspath to userspace btrfs_ioctl_ino_to_path() frees the search path after the userspace copy from the temp buffer @ipath->fspath. Which potentially can lead to a lock splat warning. Fix this by freeing the path before we copy it to userspace. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b595f2c6dfc9..df5b893494fa 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4232,6 +4232,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) ipath->fspath->val[i] = rel_ptr; } + btrfs_free_path(path); + path = NULL; ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, ipath->fspath, size); if (ret) { From 013c1c5585ebcfb19c88efe79063d0463b1b6159 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 10 Nov 2022 11:36:31 +0530 Subject: [PATCH 275/963] btrfs: free btrfs_path before copying subvol info to userspace btrfs_ioctl_get_subvol_info() frees the search path after the userspace copy from the temp buffer @subvol_info. This can lead to a lock splat warning. Fix this by freeing the path before we copy it to userspace. CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index df5b893494fa..5ba2e810dc6e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3105,6 +3105,8 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp) } } + btrfs_free_path(path); + path = NULL; if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) ret = -EFAULT; From d0cdd85046b15089df71a50548617ac1025300d0 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 11 Nov 2022 18:07:52 +0800 Subject: [PATCH 276/963] platform/x86: asus-wmi: add missing pci_dev_put() in asus_wmi_set_xusb2pr() pci_get_device() will increase the reference count for the returned pci_dev. We need to use pci_dev_put() to decrease the reference count before asus_wmi_set_xusb2pr() returns. Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221111100752.134311-1-wangxiongfeng2@huawei.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/asus-wmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 6e8e093f96b3..872efc1d5b36 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -1738,6 +1738,8 @@ static void asus_wmi_set_xusb2pr(struct asus_wmi *asus) pci_write_config_dword(xhci_pdev, USB_INTEL_XUSB2PR, cpu_to_le32(ports_available)); + pci_dev_put(xhci_pdev); + pr_info("set USB_INTEL_XUSB2PR old: 0x%04x, new: 0x%04x\n", orig_ports_available, ports_available); } From 1e817b889c7d8c14e7005258e15fec62edafe03c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 11 Nov 2022 12:16:39 +0100 Subject: [PATCH 277/963] platform/x86: acer-wmi: Enable SW_TABLET_MODE on Switch V 10 (SW5-017) Like the Acer Switch 10 (SW5-012) and Acer Switch 10 (S1003) models the Acer Switch V 10 (SW5-017) supports reporting SW_TABLET_MODE through acer-wmi. Add a DMI quirk for the SW5-017 setting force_caps to ACER_CAP_KBD_DOCK (these devices have no other acer-wmi based functionality). Cc: Rudolf Polzer Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221111111639.35730-1-hdegoede@redhat.com --- drivers/platform/x86/acer-wmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 18224f9a5bc0..ee67efdd5499 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -564,6 +564,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = { }, .driver_data = (void *)ACER_CAP_KBD_DOCK, }, + { + .callback = set_force_caps, + .ident = "Acer Aspire Switch V 10 SW5-017", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"), + }, + .driver_data = (void *)ACER_CAP_KBD_DOCK, + }, { .callback = set_force_caps, .ident = "Acer One 10 (S1003)", From d9a477f643eb3de71fbea5ae6103b800ceb8f547 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 13 Nov 2022 19:59:50 +0100 Subject: [PATCH 278/963] platform/surface: aggregator: Do not check for repeated unsequenced packets Currently, we check any received packet whether we have already seen it previously, regardless of the packet type (sequenced / unsequenced). We do this by checking the sequence number. This assumes that sequence numbers are valid for both sequenced and unsequenced packets. However, this assumption appears to be incorrect. On some devices, the sequence number field of unsequenced packets (in particular HID input events on the Surface Pro 9) is always zero. As a result, the current retransmission check kicks in and discards all but the first unsequenced packet, breaking (among other things) keyboard and touchpad input. Note that we have, so far, only seen packets being retransmitted in sequenced communication. In particular, this happens when there is an ACK timeout, causing the EC (or us) to re-send the packet waiting for an ACK. Arguably, retransmission / duplication of unsequenced packets should not be an issue as there is no logical condition (such as an ACK timeout) to determine when a packet should be sent again. Therefore, remove the retransmission check for unsequenced packets entirely to resolve the issue. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221113185951.224759-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../surface/aggregator/ssh_packet_layer.c | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/platform/surface/aggregator/ssh_packet_layer.c b/drivers/platform/surface/aggregator/ssh_packet_layer.c index 6748fe4ac5d5..def8d7ac541f 100644 --- a/drivers/platform/surface/aggregator/ssh_packet_layer.c +++ b/drivers/platform/surface/aggregator/ssh_packet_layer.c @@ -1596,16 +1596,32 @@ static void ssh_ptl_timeout_reap(struct work_struct *work) ssh_ptl_tx_wakeup_packet(ptl); } -static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, u8 seq) +static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, const struct ssh_frame *frame) { int i; + /* + * Ignore unsequenced packets. On some devices (notably Surface Pro 9), + * unsequenced events will always be sent with SEQ=0x00. Attempting to + * detect retransmission would thus just block all events. + * + * While sequence numbers would also allow detection of retransmitted + * packets in unsequenced communication, they have only ever been used + * to cover edge-cases in sequenced transmission. In particular, the + * only instance of packets being retransmitted (that we are aware of) + * is due to an ACK timeout. As this does not happen in unsequenced + * communication, skip the retransmission check for those packets + * entirely. + */ + if (frame->type == SSH_FRAME_TYPE_DATA_NSQ) + return false; + /* * Check if SEQ has been seen recently (i.e. packet was * re-transmitted and we should ignore it). */ for (i = 0; i < ARRAY_SIZE(ptl->rx.blocked.seqs); i++) { - if (likely(ptl->rx.blocked.seqs[i] != seq)) + if (likely(ptl->rx.blocked.seqs[i] != frame->seq)) continue; ptl_dbg(ptl, "ptl: ignoring repeated data packet\n"); @@ -1613,7 +1629,7 @@ static bool ssh_ptl_rx_retransmit_check(struct ssh_ptl *ptl, u8 seq) } /* Update list of blocked sequence IDs. */ - ptl->rx.blocked.seqs[ptl->rx.blocked.offset] = seq; + ptl->rx.blocked.seqs[ptl->rx.blocked.offset] = frame->seq; ptl->rx.blocked.offset = (ptl->rx.blocked.offset + 1) % ARRAY_SIZE(ptl->rx.blocked.seqs); @@ -1624,7 +1640,7 @@ static void ssh_ptl_rx_dataframe(struct ssh_ptl *ptl, const struct ssh_frame *frame, const struct ssam_span *payload) { - if (ssh_ptl_rx_retransmit_check(ptl, frame->seq)) + if (ssh_ptl_rx_retransmit_check(ptl, frame)) return; ptl->ops.data_received(ptl, payload); From d076f30957b1d026e9f6340691624926db0d369d Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 13 Nov 2022 19:59:51 +0100 Subject: [PATCH 279/963] platform/surface: aggregator_registry: Add support for Surface Pro 9 Add device nodes to enable support for battery and charger status, the ACPI platform profile, as well as internal and type-cover HID devices (including sensors, touchpad, keyboard, and other miscellaneous devices) on the Surface Pro 9. This does not include support for a tablet-mode switch yet, as that is now handled via the POS subsystem (unlike the Surface Pro 8, where it is handled via the KIP subsystem) and therefore needs further changes. While we're at it, also add the missing comment for the Surface Pro 8. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221113185951.224759-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- .../surface/surface_aggregator_registry.c | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 585911020cea..db82c2a7c567 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -268,6 +268,7 @@ static const struct software_node *ssam_node_group_sp7[] = { NULL, }; +/* Devices for Surface Pro 8 */ static const struct software_node *ssam_node_group_sp8[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -284,6 +285,23 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; +/* Devices for Surface Pro 9 */ +static const struct software_node *ssam_node_group_sp9[] = { + &ssam_node_root, + &ssam_node_hub_kip, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_pprof, + /* TODO: Tablet mode switch (via POS subsystem) */ + &ssam_node_hid_kip_keyboard, + &ssam_node_hid_kip_penstash, + &ssam_node_hid_kip_touchpad, + &ssam_node_hid_kip_fwupd, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* -- SSAM platform/meta-hub driver. ---------------------------------------- */ @@ -303,6 +321,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Pro 8 */ { "MSHW0263", (unsigned long)ssam_node_group_sp8 }, + /* Surface Pro 9 */ + { "MSHW0343", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, From 7e043a80b5dae5c2d2cf84031501de7827fd6c00 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 3 Nov 2022 16:08:14 +0000 Subject: [PATCH 280/963] netfs: Fix missing xas_retry() calls in xarray iteration netfslib has a number of places in which it performs iteration of an xarray whilst being under the RCU read lock. It *should* call xas_retry() as the first thing inside of the loop and do "continue" if it returns true in case the xarray walker passed out a special value indicating that the walk needs to be redone from the root[*]. Fix this by adding the missing retry checks. [*] I wonder if this should be done inside xas_find(), xas_next_node() and suchlike, but I'm told that's not an simple change to effect. This can cause an oops like that below. Note the faulting address - this is an internal value (|0x2) returned from xarray. BUG: kernel NULL pointer dereference, address: 0000000000000402 ... RIP: 0010:netfs_rreq_unlock+0xef/0x380 [netfs] ... Call Trace: netfs_rreq_assess+0xa6/0x240 [netfs] netfs_readpage+0x173/0x3b0 [netfs] ? init_wait_var_entry+0x50/0x50 filemap_read_page+0x33/0xf0 filemap_get_pages+0x2f2/0x3f0 filemap_read+0xaa/0x320 ? do_filp_open+0xb2/0x150 ? rmqueue+0x3be/0xe10 ceph_read_iter+0x1fe/0x680 [ceph] ? new_sync_read+0x115/0x1a0 new_sync_read+0x115/0x1a0 vfs_read+0xf3/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Changes: ======== ver #2) - Changed an unsigned int to a size_t to reduce the likelihood of an overflow as per Willy's suggestion. - Added an additional patch to fix the maths. Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") Reported-by: George Law Signed-off-by: David Howells Reviewed-by: Jeff Layton Reviewed-by: Jingbo Xu cc: Matthew Wilcox cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/166749229733.107206.17482609105741691452.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/166757987929.950645.12595273010425381286.stgit@warthog.procyon.org.uk/ # v2 --- fs/netfs/buffered_read.c | 9 +++++++-- fs/netfs/io.c | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 0ce535852151..baf668fb4315 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -46,10 +46,15 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) rcu_read_lock(); xas_for_each(&xas, folio, last_page) { - unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; - unsigned int pgend = pgpos + folio_size(folio); + unsigned int pgpos, pgend; bool pg_failed = false; + if (xas_retry(&xas, folio)) + continue; + + pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; + pgend = pgpos + folio_size(folio); + for (;;) { if (!subreq) { pg_failed = true; diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 428925899282..e374767d1b68 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -121,6 +121,9 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { + if (xas_retry(&xas, folio)) + continue; + /* We might have multiple writes from the same huge * folio, but we mustn't unlock a folio more than once. */ From 5e51c627c5acbcf82bb552e17533a79d2a6a2600 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 4 Nov 2022 15:36:49 +0000 Subject: [PATCH 281/963] netfs: Fix dodgy maths Fix the dodgy maths in netfs_rreq_unlock_folios(). start_page could be inside the folio, in which case the calculation of pgpos will be come up with a negative number (though for the moment rreq->start is rounded down earlier and folios would have to get merged whilst locked) Alter how this works to just frame the tracking in terms of absolute file positions, rather than offsets from the start of the I/O request. This simplifies the maths and makes it easier to follow. Fix the issue by using folio_pos() and folio_size() to calculate the end position of the page. Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") Reported-by: Matthew Wilcox Signed-off-by: David Howells Reviewed-by: Jeff Layton Reviewed-by: Jingbo Xu cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/Y2SJw7w1IsIik3nb@casper.infradead.org/ Link: https://lore.kernel.org/r/166757988611.950645.7626959069846893164.stgit@warthog.procyon.org.uk/ # v2 --- fs/netfs/buffered_read.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index baf668fb4315..7679a68e8193 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -17,9 +17,9 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; struct folio *folio; - unsigned int iopos, account = 0; pgoff_t start_page = rreq->start / PAGE_SIZE; pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; + size_t account = 0; bool subreq_failed = false; XA_STATE(xas, &rreq->mapping->i_pages, start_page); @@ -39,23 +39,23 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) */ subreq = list_first_entry(&rreq->subrequests, struct netfs_io_subrequest, rreq_link); - iopos = 0; subreq_failed = (subreq->error < 0); trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); rcu_read_lock(); xas_for_each(&xas, folio, last_page) { - unsigned int pgpos, pgend; + loff_t pg_end; bool pg_failed = false; if (xas_retry(&xas, folio)) continue; - pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; - pgend = pgpos + folio_size(folio); + pg_end = folio_pos(folio) + folio_size(folio) - 1; for (;;) { + loff_t sreq_end; + if (!subreq) { pg_failed = true; break; @@ -63,11 +63,11 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) folio_start_fscache(folio); pg_failed |= subreq_failed; - if (pgend < iopos + subreq->len) + sreq_end = subreq->start + subreq->len - 1; + if (pg_end < sreq_end) break; account += subreq->transferred; - iopos += subreq->len; if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { subreq = list_next_entry(subreq, rreq_link); subreq_failed = (subreq->error < 0); @@ -75,7 +75,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) subreq = NULL; subreq_failed = false; } - if (pgend == iopos) + + if (pg_end == sreq_end) break; } From b18a456330e1c1ca207b57b45872f10336741388 Mon Sep 17 00:00:00 2001 From: Emil Flink Date: Tue, 15 Nov 2022 15:45:01 +0100 Subject: [PATCH 282/963] ALSA: hda/realtek: fix speakers for Samsung Galaxy Book Pro The Samsung Galaxy Book Pro seems to have the same issue as a few other Samsung laptops, detailed in kernel bug report 207423. Sound from headphone jack works, but not the built-in speakers. alsa-info: http://alsa-project.org/db/?f=b40ba609dc6ae28dc84ad404a0d8a4bbcd8bea6d Signed-off-by: Emil Flink Cc: Link: https://lore.kernel.org/r/20221115144500.7782-1-emil.flink@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e18499dd14f0..0abf485c0192 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9436,6 +9436,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), From 1abfd71ee8f3ed99c5d0df5d9843a360541d6808 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 15 Nov 2022 18:02:35 +0100 Subject: [PATCH 283/963] ALSA: hda/realtek: Fix the speaker output on Samsung Galaxy Book Pro 360 Samsung Galaxy Book Pro 360 (13" 2021 NP930QBD-ke1US) with codec SSID 144d:c1a6 requires the same workaround for enabling the speaker amp like other Samsung models with ALC298 codec. Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1205100 Cc: Link: https://lore.kernel.org/r/20221115170235.18875-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0abf485c0192..e5c036385666 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9437,6 +9437,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc189, "Samsung Galaxy Flex Book (NT950QCG-X716)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc18a, "Samsung Galaxy Book Ion (NP930XCJ-K01US)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc1a3, "Samsung Galaxy Book Pro (NP935XDB-KC1SE)", ALC298_FIXUP_SAMSUNG_AMP), + SND_PCI_QUIRK(0x144d, 0xc1a6, "Samsung Galaxy Book Pro 360 (NP930QBD)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8), SND_PCI_QUIRK(0x144d, 0xc812, "Samsung Notebook Pen S (NT950SBE-X58)", ALC298_FIXUP_SAMSUNG_AMP), SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP), From 5428672d39b7750310cf4e328e9f94a7668b5565 Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Thu, 10 Nov 2022 22:33:14 +0800 Subject: [PATCH 284/963] drm/amdgpu: Fix memory leak in amdgpu_cs_pass1 When p->gang_size equals 0, amdgpu_cs_pass1() will return directly without freeing chunk_array, which will cause a memory leak issue, this patch fixes it. Fixes: 4624459c84d7 ("drm/amdgpu: add gang submit frontend v6") Reviewed-by: Luben Tuikov Signed-off-by: Dong Chenchen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d371000a5727..255d545e5524 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -287,8 +287,10 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } } - if (!p->gang_size) - return -EINVAL; + if (!p->gang_size) { + ret = -EINVAL; + goto free_partial_kdata; + } for (i = 0; i < p->gang_size; ++i) { ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm); From 0d502ef8898b3983eef9e40f50dfe100a0de5d93 Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Mon, 24 Oct 2022 15:36:16 +0800 Subject: [PATCH 285/963] drm/amd/display: Fix access timeout to DPIA AUX at boot time [Why] Since introduction of patch "Query DPIA HPD status.", link detection at boot could be accessing DPIA AUX, which will not succeed until DMUB outbox messaging is enabled and results in below dmesg logs: [ 160.840227] [drm:amdgpu_dm_process_dmub_aux_transfer_sync [amdgpu]] *ERROR* wait_for_completion_timeout timeout! [How] Enable DMUB outbox messaging before link detection at boot time. Reviewed-by: Wayne Lin Acked-by: Tom Chung Signed-off-by: Stylon Wang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 509739d83b5a..5a2faa892748 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1637,12 +1637,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } } - if (amdgpu_dm_initialize_drm_device(adev)) { - DRM_ERROR( - "amdgpu: failed to initialize sw for display support.\n"); - goto error; - } - /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive. * It is expected that DMUB will resend any pending notifications at this point, for * example HPD from DPIA. @@ -1650,6 +1644,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (dc_is_dmub_outbox_supported(adev->dm.dc)) dc_enable_dmub_outbox(adev->dm.dc); + if (amdgpu_dm_initialize_drm_device(adev)) { + DRM_ERROR( + "amdgpu: failed to initialize sw for display support.\n"); + goto error; + } + /* create fake encoders for MST */ dm_dp_create_fake_mst_encoders(adev); From 7af87fc1ba136143314c870059b8f60180247cbd Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 31 Oct 2022 14:58:12 -0400 Subject: [PATCH 286/963] drm/amd/display: Add HUBP surface flip interrupt handler On IGT, there is a test named amd_hotplug, and when the subtest basic is executed on DCN31, we get the following error: [drm] *ERROR* [CRTC:71:crtc-0] flip_done timed out [drm] *ERROR* flip_done timed out [drm] *ERROR* [CRTC:71:crtc-0] commit wait timed out [drm] *ERROR* flip_done timed out [drm] *ERROR* [CONNECTOR:88:DP-1] commit wait timed out [drm] *ERROR* flip_done timed out [drm] *ERROR* [PLANE:59:plane-3] commit wait timed out After enable the page flip log with the below command: echo -n 'format "[PFLIP]" +p' > /sys/kernel/debug/dynamic_debug/control It is possible to see that the flip was submitted, but DC never replied back, which generates time-out issues. This is an indication that the HUBP surface flip is missing. This commit fixes this issue by adding hubp1_set_flip_int to DCN31. Reviewed-by: Nicholas Kazlauskas Acked-by: Tom Chung Signed-off-by: Rodrigo Siqueira Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c index 84e1486f3d51..39a57bcd7866 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c @@ -87,6 +87,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_init = hubp3_init, .set_unbounded_requesting = hubp31_set_unbounded_requesting, .hubp_soft_reset = hubp31_soft_reset, + .hubp_set_flip_int = hubp1_set_flip_int, .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank, }; From 2c2911e09f19eac85df83b3201b38e69b8117059 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Sat, 12 Nov 2022 16:24:20 +0800 Subject: [PATCH 287/963] drm/amdgpu: Add psp_13_0_10_ta firmware to modinfo TA firmware loaded on psp v13_0_10, but it is missing in modinfo. Signed-off-by: Candice Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 21d822b1d589..88f9b327183a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -45,6 +45,7 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 From 8d8494c3467d366eb0f7c8198dab80be8bdc47d2 Mon Sep 17 00:00:00 2001 From: Stylon Wang Date: Wed, 26 Oct 2022 21:00:40 +0800 Subject: [PATCH 288/963] drm/amd/display: Fix invalid DPIA AUX reply causing system hang [Why] Some DPIA AUX replies have incorrect data length from original request. This could lead to overwriting of destination buffer if reply length is larger, which could cause invalid access to stack since many destination buffers are declared as local variables. [How] Check for invalid length from DPIA AUX replies and trigger a retry if reply length is not the same as original request. A DRM_WARN() dmesg log is also produced. Reviewed-by: Roman Li Acked-by: Tom Chung Signed-off-by: Stylon Wang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 20 +++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 6 ------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5a2faa892748..3e1ecca72430 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -147,6 +147,14 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU); /* Number of bytes in PSP footer for firmware. */ #define PSP_FOOTER_BYTES 0x100 +/* + * DMUB Async to Sync Mechanism Status + */ +#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 +#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 +#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 +#define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4 + /** * DOC: overview * @@ -10109,6 +10117,8 @@ static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, *operation_result = AUX_RET_ERROR_TIMEOUT; } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) { *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; + } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) { + *operation_result = AUX_RET_ERROR_INVALID_REPLY; } else { *operation_result = AUX_RET_ERROR_UNKNOWN; } @@ -10156,6 +10166,16 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; if (!payload->write && adev->dm.dmub_notify->aux_reply.length && payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) { + + if (payload->length != adev->dm.dmub_notify->aux_reply.length) { + DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n", + payload->address, payload->length, + adev->dm.dmub_notify->aux_reply.length); + return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx, + DMUB_ASYNC_TO_SYNC_ACCESS_INVALID, + (uint32_t *)operation_result); + } + memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data, adev->dm.dmub_notify->aux_reply.length); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index b5ce15c43bcc..635c398fcefe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -50,12 +50,6 @@ #define AMDGPU_DMUB_NOTIFICATION_MAX 5 -/* - * DMUB Async to Sync Mechanism Status - */ -#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 -#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 -#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 /* #include "include/amdgpu_dal_power_if.h" #include "amdgpu_dm_irq.h" From 338ed913f3e43db71d71343ee204a9b6159279b3 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Tue, 15 Nov 2022 00:44:26 +0000 Subject: [PATCH 289/963] ANDROID: GKI: Convert NFC support as GKI module CONFIG_NFC: NFC subsystem support Bug: 232431151 Test: TH Change-Id: Idc4d8c6656a8c4e36a695d9c9eb18fe20793bc76 Signed-off-by: Ramji Jiyani (cherry picked from commit 48a868ee068a7fb0989a94ab244b1ce3d26cffba) --- BUILD.bazel | 1 + android/gki_system_dlkm_modules | 1 + arch/arm64/configs/gki_defconfig | 2 +- arch/x86/configs/gki_defconfig | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 932001e19545..74e037fb2e2a 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -35,6 +35,7 @@ _common_gki_modules_list = [ "net/can/can-gw.ko", "net/can/can-raw.ko", "net/mac80211/mac80211.ko", + "net/nfc/nfc.ko", "net/tipc/diag.ko", "net/tipc/tipc.ko", "net/wireless/cfg80211.ko", diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index f6518233d9ff..88bb374a0eb8 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -11,6 +11,7 @@ net/can/can-bcm.ko net/can/can-gw.ko net/can/can-raw.ko net/mac80211/mac80211.ko +net/nfc/nfc.ko net/tipc/diag.ko net/tipc/tipc.ko net/wireless/cfg80211.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index f74d71ee0f22..4555c953b374 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -272,7 +272,7 @@ CONFIG_NL80211_TESTMODE=y # CONFIG_CFG80211_CRDA_SUPPORT is not set CONFIG_MAC80211=m CONFIG_RFKILL=y -CONFIG_NFC=y +CONFIG_NFC=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCIEAER=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 481110e55540..e264f2885a26 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -262,7 +262,7 @@ CONFIG_NL80211_TESTMODE=y # CONFIG_CFG80211_CRDA_SUPPORT is not set CONFIG_MAC80211=m CONFIG_RFKILL=y -CONFIG_NFC=y +CONFIG_NFC=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCIEAER=y From 2632daebafd04746b4b96c2f26a6021bc38f6209 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 14 Nov 2022 12:44:01 +0100 Subject: [PATCH 290/963] x86/cpu: Restore AMD's DE_CFG MSR after resume DE_CFG contains the LFENCE serializing bit, restore it on resume too. This is relevant to older families due to the way how they do S3. Unify and correct naming while at it. Fixes: e4d0e84e4907 ("x86/cpu/AMD: Make LFENCE a serializing instruction") Reported-by: Andrew Cooper Reported-by: Pawan Gupta Signed-off-by: Borislav Petkov Cc: Signed-off-by: Linus Torvalds --- arch/x86/include/asm/msr-index.h | 8 +++++--- arch/x86/kernel/cpu/amd.c | 6 ++---- arch/x86/kernel/cpu/hygon.c | 4 ++-- arch/x86/kvm/svm/svm.c | 10 +++++----- arch/x86/kvm/x86.c | 2 +- arch/x86/power/cpu.c | 1 + tools/arch/x86/include/asm/msr-index.h | 8 +++++--- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 10ac52705892..4a2af82553e4 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -535,6 +535,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -640,9 +645,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 860b60273df3..c75d75b9f11a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -770,8 +770,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } -#define MSR_AMD64_DE_CFG 0xC0011029 - static void init_amd_ln(struct cpuinfo_x86 *c) { /* @@ -965,8 +963,8 @@ static void init_amd(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 21fd425088fe..c393b8773ace 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -326,8 +326,8 @@ static void init_hygon(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9f88c8e6766e..4b6d2b050e57 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2709,9 +2709,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) msr->data = 0; switch (msr->index) { - case MSR_F10H_DECFG: - if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) - msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + case MSR_AMD64_DE_CFG: + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; case MSR_IA32_PERF_CAPABILITIES: return 0; @@ -2812,7 +2812,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; - case MSR_F10H_DECFG: + case MSR_AMD64_DE_CFG: msr_info->data = svm->msr_decfg; break; default: @@ -3041,7 +3041,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_F10H_DECFG: { + case MSR_AMD64_DE_CFG: { struct kvm_msr_entry msr_entry; msr_entry.index = msr->index; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecea83f0da49..490ec23c8450 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1557,7 +1557,7 @@ static const u32 msr_based_features_all[] = { MSR_IA32_VMX_EPT_VPID_CAP, MSR_IA32_VMX_VMFUNC, - MSR_F10H_DECFG, + MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index bb176c72891c..4cd39f304e20 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -519,6 +519,7 @@ static void pm_save_spec_msr(void) MSR_TSX_FORCE_ABORT, MSR_IA32_MCU_OPT_CTRL, MSR_AMD64_LS_CFG, + MSR_AMD64_DE_CFG, }; msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 10ac52705892..f17ade084720 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -535,6 +535,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -640,9 +645,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a From 7e952a18eb978a3e51fc1704b752378be66226b2 Mon Sep 17 00:00:00 2001 From: George Shen Date: Wed, 2 Nov 2022 15:06:48 -0400 Subject: [PATCH 291/963] drm/amd/display: Support parsing VRAM info v3.0 from VBIOS [Why] For DCN3.2 and DCN3.21, VBIOS has switch to using v3.0 of the VRAM info struct. We should read and override the VRAM info in driver with values provided by VBIOS to support memory downbin cases. Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- .../drm/amd/display/dc/bios/bios_parser2.c | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index ee0456b5e14e..e0c8d6f09bb4 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -2393,6 +2393,26 @@ static enum bp_result get_vram_info_v25( return result; } +static enum bp_result get_vram_info_v30( + struct bios_parser *bp, + struct dc_vram_info *info) +{ + struct atom_vram_info_header_v3_0 *info_v30; + enum bp_result result = BP_RESULT_OK; + + info_v30 = GET_IMAGE(struct atom_vram_info_header_v3_0, + DATA_TABLES(vram_info)); + + if (info_v30 == NULL) + return BP_RESULT_BADBIOSTABLE; + + info->num_chans = info_v30->channel_num; + info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8; + + return result; +} + + /* * get_integrated_info_v11 * @@ -3060,6 +3080,16 @@ static enum bp_result bios_parser_get_vram_info( } break; + case 3: + switch (revision.minor) { + case 0: + result = get_vram_info_v30(bp, info); + break; + default: + break; + } + break; + default: return result; } From 0a3e0fb8cfee4f45f1b1f3d4d028a4519c89d577 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 17 Oct 2022 15:31:57 +0000 Subject: [PATCH 292/963] Revert "drm: hide unregistered connectors from GETCONNECTOR IOCTL" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 981f09295687f856d5345e19c7084aca481c1395. It turns out this causes logically active but disconnected DP MST connectors to disappear from the KMS resources list, and Mutter then assumes the connector is already disabled. Later on Mutter tries to re-use the same CRTC but fails since on the kernel side it's still tied to the disconnected DP MST connector. Signed-off-by: Simon Ser Tested-by: Jonas Ådahl Reviewed-by: Lyude Paul Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20221017153150.60675-1-contact@emersion.fr --- drivers/gpu/drm/drm_mode_config.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index 939d621c9ad4..688c8afe0bf1 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -151,9 +151,6 @@ int drm_mode_getresources(struct drm_device *dev, void *data, count = 0; connector_id = u64_to_user_ptr(card_res->connector_id_ptr); drm_for_each_connector_iter(connector, &conn_iter) { - if (connector->registration_state != DRM_CONNECTOR_REGISTERED) - continue; - /* only expose writeback connectors if userspace understands them */ if (!file_priv->writeback_connectors && (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)) From 4d285241230676ba8b888701b89684b4e0360fcc Mon Sep 17 00:00:00 2001 From: George Shen Date: Tue, 1 Nov 2022 23:03:03 -0400 Subject: [PATCH 293/963] drm/amd/display: Fix calculation for cursor CAB allocation [Why] The cursor size (in memory) is currently incorrectly calculated, resulting not enough CAB being allocated for static screen cursor in MALL refresh. This results in cursor image corruption. [How] Use cursor pitch instead of cursor width when calculating cursor size. Update num cache lines calculation to use the result of the cursor size calculation instead of manually recalculating again. Reviewed-by: Alvin Lee Acked-by: Tom Chung Signed-off-by: George Shen Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index cf5bd9713f54..ac41a763bb1d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -283,8 +283,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c using the max for calculation */ if (hubp->curs_attr.width > 0) { - // Round cursor width to next multiple of 64 - cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height; + cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height; switch (pipe->stream->cursor_attributes.color_format) { case CURSOR_MODE_MONO: @@ -309,9 +308,9 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c cursor_size > 16384) { /* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1) */ - cache_lines_used += (((hubp->curs_attr.width * hubp->curs_attr.height * cursor_bpp + - DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES) * - DCN3_2_MALL_MBLK_SIZE_BYTES) / dc->caps.cache_line_size + 2; + cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / + DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) / + dc->caps.cache_line_size + 2; } break; } @@ -727,10 +726,7 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context) struct hubp *hubp = pipe->plane_res.hubp; if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) { - //Round cursor width up to next multiple of 64 - int cursor_width = ((hubp->curs_attr.width + 63) / 64) * 64; - int cursor_height = hubp->curs_attr.height; - int cursor_size = cursor_width * cursor_height; + int cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height; switch (hubp->curs_attr.color_format) { case CURSOR_MODE_MONO: From e7e4f77c991c9abf90924929a9d55f90b0bb78de Mon Sep 17 00:00:00 2001 From: Roman Li Date: Thu, 3 Nov 2022 17:33:07 -0400 Subject: [PATCH 294/963] drm/amd/display: Fix optc2_configure warning on dcn314 [Why] dcn314 uses optc2_configure_crc() that wraps optc1_configure_crc() + set additional registers not applicable to dcn314. It's not critical but when used leads to warning like: WARNING: drivers/gpu/drm/amd/amdgpu/../display/dc/dc_helper.c Call Trace: generic_reg_set_ex+0x6d/0xe0 [amdgpu] optc2_configure_crc+0x60/0x80 [amdgpu] dc_stream_configure_crc+0x129/0x150 [amdgpu] amdgpu_dm_crtc_configure_crc_source+0x5d/0xe0 [amdgpu] [How] Use optc1_configure_crc() directly Reviewed-by: Nicholas Kazlauskas Acked-by: Tom Chung Signed-off-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c index 47eb162f1a75..7dd36e402bac 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c @@ -237,7 +237,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = { .clear_optc_underflow = optc1_clear_optc_underflow, .setup_global_swap_lock = NULL, .get_crc = optc1_get_crc, - .configure_crc = optc2_configure_crc, + .configure_crc = optc1_configure_crc, .set_dsc_config = optc3_set_dsc_config, .get_dsc_status = optc2_get_dsc_status, .set_dwb_source = NULL, From 246e667079e8d0fc85f842bceca8c5a3c5da5905 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 3 Nov 2022 18:33:38 -0400 Subject: [PATCH 295/963] drm/amd/display: Fix prefetch calculations for dcn32 [Description] Prefetch calculation loop was not exiting until utilizing all of vstartup if it failed once. Locals need to be reset on each iteration of the loop. Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 244fd15d24b4..9afd9ba23fb2 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -718,6 +718,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman do { MaxTotalRDBandwidth = 0; + DestinationLineTimesForPrefetchLessThan2 = false; + VRatioPrefetchMoreThanMax = false; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines); #endif From c149947b188c651b943c1d8ca1494d1a98a3e27f Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 3 Nov 2022 18:38:13 -0400 Subject: [PATCH 296/963] drm/amd/display: use uclk pstate latency for fw assisted mclk validation dcn32 [WHY?] DCN32 uses fclk pstate watermarks for dummy pstate, and must always be supported. [HOW?] Validation needs to be run with fclk pstate latency set as the dummy pstate latency to get correct prefetch and bandwidth outputs. Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 659323ebd79d..12e17bcfca3f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1803,6 +1803,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, */ context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; + /* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so + * prefetch is scheduled correctly to account for dummy pstate. + */ + if (dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false); maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb; dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; @@ -1990,6 +1996,10 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod; + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us; + dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); if (!pstate_en) @@ -1997,8 +2007,12 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us; - if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) + if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) { dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context); + if (dummy_latency_index == 0) + context->bw_ctx.dml.soc.fclk_change_latency_us = + dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us; + } } static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts, From f8d7edb0cda6ea1cba89d6f8aac74613e9cdc075 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 3 Nov 2022 18:40:01 -0400 Subject: [PATCH 297/963] drm/amd/display: Set max for prefetch lines on dcn32 [WHY?] Max number of lines that can be used for prefetch due to type constraints is 63.75. [HOW?] Enforce maximum prefetch lines as 63.75. Reviewed-by: Jun Lei Acked-by: Tom Chung Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h | 2 ++ .../amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h index f82e14cd9d8a..c8b28c83ddf4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h @@ -46,6 +46,8 @@ // Prefetch schedule max vratio #define __DML_MAX_VRATIO_PRE__ 4.0 +#define __DML_VBA_MAX_DST_Y_PRE__ 63.75 + #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 635fc54338fa..debe46b24a3e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -3475,7 +3475,6 @@ bool dml32_CalculatePrefetchSchedule( double min_Lsw; double Tsw_est1 = 0; double Tsw_est3 = 0; - double TPreMargin = 0; if (v->GPUVMEnable == true && v->HostVMEnable == true) HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; @@ -3669,6 +3668,7 @@ bool dml32_CalculatePrefetchSchedule( dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); + dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); @@ -3701,8 +3701,6 @@ bool dml32_CalculatePrefetchSchedule( dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; Tpre_rounded = dst_y_prefetch_equ * LineTime; - - TPreMargin = Tpre_rounded - TPreReq; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); @@ -3730,7 +3728,8 @@ bool dml32_CalculatePrefetchSchedule( *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; - if (dst_y_prefetch_equ > 1 && TPreMargin > 0.0) { + if (dst_y_prefetch_equ > 1 && + (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { double PrefetchBandwidth1; double PrefetchBandwidth2; double PrefetchBandwidth3; From 0e444a4de6b38c4593a07e4cfb5bf54c40cc79b6 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Thu, 3 Nov 2022 17:45:00 -0100 Subject: [PATCH 298/963] drm/amd/display: don't enable DRM CRTC degamma property for DCE DM maps DRM CRTC degamma to DPP (pre-blending) degamma block, but DCE doesn't support programmable degamma curve anywhere. Currently, a custom degamma is accepted by DM but just ignored by DCE driver and degamma correction isn't actually applied. There is no way to map custom degamma in DCE, therefore, DRM CRTC degamma property shouldn't be enabled for DCE drivers. Reviewed-by: Rodrigo Siqueira Signed-off-by: Melissa Wen Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 594fe8a4d02b..64dd02970292 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -412,7 +412,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, { struct amdgpu_crtc *acrtc = NULL; struct drm_plane *cursor_plane; - + bool is_dcn; int res = -ENOMEM; cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL); @@ -450,8 +450,14 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, acrtc->otg_inst = -1; dm->adev->mode_info.crtcs[crtc_index] = acrtc; - drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, + + /* Don't enable DRM CRTC degamma property for DCE since it doesn't + * support programmable degamma anywhere. + */ + is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch; + drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0, true, MAX_COLOR_LUT_ENTRIES); + drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); return 0; From 192039f12233c9063d040266e7c98188c7c89dec Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Fri, 11 Nov 2022 16:54:18 +0800 Subject: [PATCH 299/963] drm/amdgpu: disable BACO support on more cards Otherwise, some unexpected PCIE AER errors will be observed in runtime suspend/resume cycle. Signed-off-by: Guchun Chen Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 6212fd270857..697e98a0a20a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -379,6 +379,10 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF)) || ((adev->pdev->device == 0x7422) && + (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73A3) && + (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73E3) && (adev->pdev->revision == 0x00))) smu_baco->platform_support = false; From 6f9eea4392a178af19360694b1db64f985d0b459 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 14 Nov 2022 11:43:48 -0500 Subject: [PATCH 300/963] drm/amdkfd: Fix a memory limit issue It is to resolve a regression, which fails to allocate VRAM due to no free memory in application, the reason is we add check of vram_pin_size for memory limit, and application is pinning the memory for Peerdirect, KFD should not count it in memory limit. So removing vram_pin_size will resolve it. Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 84f44f7e4111..6d1ff7b9258d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -171,9 +171,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev && adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - - atomic64_read(&adev->vram_pin_size) - - reserved_for_pt)) { + adev->gmc.real_vram_size - reserved_for_pt)) { ret = -ENOMEM; goto release; } From f8794f31abf33a3b22c72002783670a95e6efc51 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 11 Nov 2022 12:50:38 -0500 Subject: [PATCH 301/963] drm/amdgpu: there is no vbios fb on devices with no display hw (v2) If we enable virtual display functionality on parts with no display hardware we can end up trying to check for and reserve the vbios FB area on devices where it doesn't exist. Check if display hardware is actually present on the hardware before trying to reserve the memory. v2: move the check into common code Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 41 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 2 +- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8639a4f9c6e8..2eca58220550 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1293,6 +1293,7 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); +bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 64510898eedd..f1e9663b4051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6044,3 +6044,44 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, dma_fence_put(old); return NULL; } + +bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + case CHIP_TOPAZ: + /* chips with no display hardware */ + return false; +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: +#endif + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + case CHIP_CARRIZO: + case CHIP_STONEY: + /* chips with display hardware */ + return true; + default: + /* IP discovery */ + if (!adev->ip_versions[DCE_HWIP][0] || + (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) + return false; + return true; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 34233a74248c..b06cb0bb166c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -656,7 +656,7 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) } if (amdgpu_sriov_vf(adev) || - !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) { + !amdgpu_device_has_display_hardware(adev)) { size = 0; } else { size = amdgpu_gmc_get_vbios_fb_size(adev); From 8652da45d09abe1b3174dbb80dc5176b8c3fa08e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 8 Nov 2022 15:44:46 +0800 Subject: [PATCH 302/963] drm/amd/pm: enable runpm support over BACO for SMU13.0.0 Enable SMU13.0.0 runpm support. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 8 +++++ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h | 10 +------ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 11 ++----- .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 +- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 9 ++++++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 30 +++++++++++++++++-- 6 files changed, 50 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index e2fa3b066b96..f816b1dd110e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1388,6 +1388,14 @@ enum smu_cmn2asic_mapping_type { CMN2ASIC_MAPPING_WORKLOAD, }; +enum smu_baco_seq { + BACO_SEQ_BACO = 0, + BACO_SEQ_MSR, + BACO_SEQ_BAMACO, + BACO_SEQ_ULPS, + BACO_SEQ_COUNT, +}; + #define MSG_MAP(msg, index, valid_in_vf) \ [SMU_MSG_##msg] = {1, (index), (valid_in_vf)} diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h index a9215494dcdd..d466db6f0ad4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h @@ -147,14 +147,6 @@ struct smu_11_5_power_context { uint32_t max_fast_ppt_limit; }; -enum smu_v11_0_baco_seq { - BACO_SEQ_BACO = 0, - BACO_SEQ_MSR, - BACO_SEQ_BAMACO, - BACO_SEQ_ULPS, - BACO_SEQ_COUNT, -}; - #if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) int smu_v11_0_init_microcode(struct smu_context *smu); @@ -257,7 +249,7 @@ int smu_v11_0_baco_enter(struct smu_context *smu); int smu_v11_0_baco_exit(struct smu_context *smu); int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu, - enum smu_v11_0_baco_seq baco_seq); + enum smu_baco_seq baco_seq); int smu_v11_0_mode1_reset(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index 80fb583b18d9..b7f4569aff2a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -124,14 +124,6 @@ struct smu_13_0_power_context { enum smu_13_0_power_state power_state; }; -enum smu_v13_0_baco_seq { - BACO_SEQ_BACO = 0, - BACO_SEQ_MSR, - BACO_SEQ_BAMACO, - BACO_SEQ_ULPS, - BACO_SEQ_COUNT, -}; - #if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3) int smu_v13_0_init_microcode(struct smu_context *smu); @@ -218,6 +210,9 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu); int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks); +int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, + enum smu_baco_seq baco_seq); + bool smu_v13_0_baco_is_support(struct smu_context *smu); enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index dccbd9f70723..70b560737687 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1576,7 +1576,7 @@ int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu) } int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu, - enum smu_v11_0_baco_seq baco_seq) + enum smu_baco_seq baco_seq) { return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ArmD3, baco_seq, NULL); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 43fb102a65f5..89f0f6eb19f3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2230,6 +2230,15 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu, return ret; } +int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu, + enum smu_baco_seq baco_seq) +{ + return smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_ArmD3, + baco_seq, + NULL); +} + bool smu_v13_0_baco_is_support(struct smu_context *smu) { struct smu_baco_context *smu_baco = &smu->smu_baco; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 29529328152d..f0121d171630 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -120,6 +120,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(Mode1Reset, PPSMC_MSG_Mode1Reset, 0), MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), + MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -1566,6 +1567,31 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu, NULL); } +static int smu_v13_0_0_baco_enter(struct smu_context *smu) +{ + struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) + return smu_v13_0_baco_set_armd3_sequence(smu, + smu_baco->maco_support ? BACO_SEQ_BAMACO : BACO_SEQ_BACO); + else + return smu_v13_0_baco_enter(smu); +} + +static int smu_v13_0_0_baco_exit(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { + /* Wait for PMFW handling for the Dstate change */ + usleep_range(10000, 11000); + return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); + } else { + return smu_v13_0_baco_exit(smu); + } +} + static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1827,8 +1853,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_is_support = smu_v13_0_baco_is_support, .baco_get_state = smu_v13_0_baco_get_state, .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_baco_enter, - .baco_exit = smu_v13_0_baco_exit, + .baco_enter = smu_v13_0_0_baco_enter, + .baco_exit = smu_v13_0_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_0_set_mp1_state, From df7c013efc1a0da8861099802b2d6ab2aacaeb1b Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 8 Nov 2022 16:07:39 +0800 Subject: [PATCH 303/963] drm/amd/pm: enable runpm support over BACO for SMU13.0.7 Enable SMU13.0.7 runpm support. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index c4102cfb734c..d74debc584f8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -122,6 +122,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), + MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { @@ -1578,6 +1579,31 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu, return ret; } +static int smu_v13_0_7_baco_enter(struct smu_context *smu) +{ + struct smu_baco_context *smu_baco = &smu->smu_baco; + struct amdgpu_device *adev = smu->adev; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) + return smu_v13_0_baco_set_armd3_sequence(smu, + smu_baco->maco_support ? BACO_SEQ_BAMACO : BACO_SEQ_BACO); + else + return smu_v13_0_baco_enter(smu); +} + +static int smu_v13_0_7_baco_exit(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) { + /* Wait for PMFW handling for the Dstate change */ + usleep_range(10000, 11000); + return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS); + } else { + return smu_v13_0_baco_exit(smu); + } +} + static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1655,8 +1681,8 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .baco_is_support = smu_v13_0_baco_is_support, .baco_get_state = smu_v13_0_baco_get_state, .baco_set_state = smu_v13_0_baco_set_state, - .baco_enter = smu_v13_0_baco_enter, - .baco_exit = smu_v13_0_baco_exit, + .baco_enter = smu_v13_0_7_baco_enter, + .baco_exit = smu_v13_0_7_baco_exit, .mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported, .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_7_set_mp1_state, From 4b14841c9a820e484bc8c4c3f5a6fed1bc528cbc Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 8 Nov 2022 15:52:20 +0800 Subject: [PATCH 304/963] drm/amd/pm: fix SMU13 runpm hang due to unintentional workaround The workaround designed for some specific ASICs is wrongly applied to SMU13 ASICs. That leads to some runpm hang. Signed-off-by: Evan Quan Reviewed-by: Hawking Zhang Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 4fe75dd2b329..b880f4d7d67e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1156,22 +1156,21 @@ static int smu_smc_hw_setup(struct smu_context *smu) uint64_t features_supported; int ret = 0; - if (adev->in_suspend && smu_is_dpm_running(smu)) { - dev_info(adev->dev, "dpm has been enabled\n"); - /* this is needed specifically */ - switch (adev->ip_versions[MP1_HWIP][0]) { - case IP_VERSION(11, 0, 7): - case IP_VERSION(11, 0, 11): - case IP_VERSION(11, 5, 0): - case IP_VERSION(11, 0, 12): + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 7): + case IP_VERSION(11, 0, 11): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 0, 12): + if (adev->in_suspend && smu_is_dpm_running(smu)) { + dev_info(adev->dev, "dpm has been enabled\n"); ret = smu_system_features_control(smu, true); if (ret) dev_err(adev->dev, "Failed system features control!\n"); - break; - default: - break; + return ret; } - return ret; + break; + default: + break; } ret = smu_init_display_count(smu, 0); From d520de6cb42e88a1d008b54f935caf9fc05951da Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Tue, 15 Nov 2022 17:27:01 +0300 Subject: [PATCH 305/963] cifs: add check for returning value of SMB2_close_init If the returning value of SMB2_close_init is an error-value, exit the function. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 352d96f3acc6 ("cifs: multichannel: move channel selection above transport layer") Signed-off-by: Anastasia Belova Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 880cd494afea..9737296c0fbc 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1126,6 +1126,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rqst[2].rq_nvec = 1; rc = SMB2_close_init(tcon, server, &rqst[2], COMPOUND_FID, COMPOUND_FID, false); + if (rc) + goto sea_exit; smb2_set_related(&rqst[2]); rc = compound_send_recv(xid, ses, server, From 5954acbacbd1946b96ce8ee799d309cb0cd3cb9d Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 6 Oct 2022 11:33:14 +0200 Subject: [PATCH 306/963] drm/display: Don't assume dual mode adaptors support i2c sub-addressing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current dual mode adaptor ("DP++") detection code assumes that all adaptors support i2c sub-addressing for read operations from the DP-HDMI adaptor ID buffer. It has been observed that multiple adaptors do not in fact support this, and always return data starting at register 0. On affected adaptors, the code fails to read the proper registers that would identify the device as a type 2 adaptor, and handles those as type 1, limiting the TMDS clock to 165MHz, even if the according register would announce a higher TMDS clock. Fix this by always reading the ID buffer starting from offset 0, and discarding any bytes before the actual offset of interest. We tried finding authoritative documentation on whether or not this is allowed behaviour, but since all the official VESA docs are paywalled, the best we could come up with was the spec sheet for Texas Instruments' SNx5DP149 chip family.[1] It explicitly mentions that sub-addressing is supported for register writes, but *not* for reads (See NOTE in section 8.5.3). Unless TI openly decided to violate the VESA spec, one could take that as a hint that sub-addressing is in fact not mandated by VESA. The other two adaptors affected used the PS8409(A) and the LT8611, according to the data returned from their ID buffers. [1] https://www.ti.com/lit/ds/symlink/sn75dp149.pdf Cc: stable@vger.kernel.org Signed-off-by: Simon Rettberg Reviewed-by: Rafael Gieschke Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221006113314.41101987@computer Acked-by: Jani Nikula --- .../gpu/drm/display/drm_dp_dual_mode_helper.c | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c index 3ea53bb67d3b..bd61e20770a5 100644 --- a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c @@ -63,23 +63,45 @@ ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter, u8 offset, void *buffer, size_t size) { + u8 zero = 0; + char *tmpbuf = NULL; + /* + * As sub-addressing is not supported by all adaptors, + * always explicitly read from the start and discard + * any bytes that come before the requested offset. + * This way, no matter whether the adaptor supports it + * or not, we'll end up reading the proper data. + */ struct i2c_msg msgs[] = { { .addr = DP_DUAL_MODE_SLAVE_ADDRESS, .flags = 0, .len = 1, - .buf = &offset, + .buf = &zero, }, { .addr = DP_DUAL_MODE_SLAVE_ADDRESS, .flags = I2C_M_RD, - .len = size, + .len = size + offset, .buf = buffer, }, }; int ret; + if (offset) { + tmpbuf = kmalloc(size + offset, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + + msgs[1].buf = tmpbuf; + } + ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs)); + if (tmpbuf) + memcpy(buffer, tmpbuf + offset, size); + + kfree(tmpbuf); + if (ret < 0) return ret; if (ret != ARRAY_SIZE(msgs)) @@ -208,18 +230,6 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev, if (ret) return DRM_DP_DUAL_MODE_UNKNOWN; - /* - * Sigh. Some (maybe all?) type 1 adaptors are broken and ack - * the offset but ignore it, and instead they just always return - * data from the start of the HDMI ID buffer. So for a broken - * type 1 HDMI adaptor a single byte read will always give us - * 0x44, and for a type 1 DVI adaptor it should give 0x00 - * (assuming it implements any registers). Fortunately neither - * of those values will match the type 2 signature of the - * DP_DUAL_MODE_ADAPTOR_ID register so we can proceed with - * the type 2 adaptor detection safely even in the presence - * of broken type 1 adaptors. - */ ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID, &adaptor_id, sizeof(adaptor_id)); drm_dbg_kms(dev, "DP dual mode adaptor ID: %02x (err %zd)\n", adaptor_id, ret); @@ -233,11 +243,10 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev, return DRM_DP_DUAL_MODE_TYPE2_DVI; } /* - * If neither a proper type 1 ID nor a broken type 1 adaptor - * as described above, assume type 1, but let the user know - * that we may have misdetected the type. + * If not a proper type 1 ID, still assume type 1, but let + * the user know that we may have misdetected the type. */ - if (!is_type1_adaptor(adaptor_id) && adaptor_id != hdmi_id[0]) + if (!is_type1_adaptor(adaptor_id)) drm_err(dev, "Unexpected DP dual mode adaptor ID %02x\n", adaptor_id); } @@ -343,10 +352,8 @@ EXPORT_SYMBOL(drm_dp_dual_mode_get_tmds_output); * @enable: enable (as opposed to disable) the TMDS output buffers * * Set the state of the TMDS output buffers in the adaptor. For - * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. As - * some type 1 adaptors have problems with registers (see comments - * in drm_dp_dual_mode_detect()) we avoid touching the register, - * making this function a no-op on type 1 adaptors. + * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. + * Type1 adaptors do not support any register writes. * * Returns: * 0 on success, negative error code on failure From 56afa3f145ff2d0571f152cee9dba4d5cc8aeeb3 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Tue, 15 Nov 2022 01:14:04 +0000 Subject: [PATCH 307/963] ANDROID: GKI: Convert Bluetooth RFCOMM as module CONFIG_BT_RFCOMM: RFCOMM protocol support RFCOMM provides connection oriented stream transport. RFCOMM support is required for Dialup Networking, OBEX and other Bluetooth applications. Bug: 232431151 Test: TH Change-Id: I573d8c7a1eff02b1554b5d6793b5c3c06fb35931 Signed-off-by: Ramji Jiyani (cherry picked from commit 264b55ef9b11bdc59c6164de01e3daf6a44bc95d) --- BUILD.bazel | 1 + android/gki_system_dlkm_modules | 1 + arch/arm64/configs/gki_defconfig | 2 +- arch/x86/configs/gki_defconfig | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 74e037fb2e2a..701972a2c251 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -31,6 +31,7 @@ _common_gki_modules_list = [ "drivers/usb/serial/usbserial.ko", "mm/zsmalloc.ko", "net/8021q/8021q.ko", + "net/bluetooth/rfcomm/rfcomm.ko", "net/can/can-bcm.ko", "net/can/can-gw.ko", "net/can/can-raw.ko", diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index 88bb374a0eb8..95a80cdc12f5 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -7,6 +7,7 @@ drivers/usb/serial/ftdi_sio.ko drivers/usb/serial/usbserial.ko mm/zsmalloc.ko net/8021q/8021q.ko +net/bluetooth/rfcomm/rfcomm.ko net/can/can-bcm.ko net/can/can-gw.ko net/can/can-raw.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 4555c953b374..4398b5c5074d 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -258,7 +258,7 @@ CONFIG_CAN_RAW=m CONFIG_CAN_BCM=m CONFIG_CAN_GW=m CONFIG_BT=y -CONFIG_BT_RFCOMM=y +CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_HIDP=y CONFIG_BT_HCIBTSDIO=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index e264f2885a26..735d5fac238e 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -248,7 +248,7 @@ CONFIG_CAN_RAW=m CONFIG_CAN_BCM=m CONFIG_CAN_GW=m CONFIG_BT=y -CONFIG_BT_RFCOMM=y +CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_HIDP=y CONFIG_BT_HCIBTSDIO=y From d39c7fb8b05fe5bc609ca1f457e313d79f72a195 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Tue, 15 Nov 2022 19:33:44 +0000 Subject: [PATCH 308/963] ANDROID: GKI: Covnert Bluetooth HIDP as module CONFIG_BT_HIDP: HIDP protocol support HIDP (Human Interface Device Protocol) is a transport layer for HID reports. HIDP is required for the Bluetooth Human Interface Device Profile. Bug: 232431151 Test: TH Change-Id: Ibe9ed98370ecc9c9825c3a735070817d16254138 Signed-off-by: Ramji Jiyani (cherry picked from commit 4747c631a478866edd2cd6e0c98ce8febedf6ab1) --- BUILD.bazel | 1 + android/gki_system_dlkm_modules | 1 + arch/arm64/configs/gki_defconfig | 2 +- arch/x86/configs/gki_defconfig | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 701972a2c251..8dc5ba0e72a6 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -31,6 +31,7 @@ _common_gki_modules_list = [ "drivers/usb/serial/usbserial.ko", "mm/zsmalloc.ko", "net/8021q/8021q.ko", + "net/bluetooth/hidp/hidp.ko", "net/bluetooth/rfcomm/rfcomm.ko", "net/can/can-bcm.ko", "net/can/can-gw.ko", diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index 95a80cdc12f5..c22a1d4f23a5 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -7,6 +7,7 @@ drivers/usb/serial/ftdi_sio.ko drivers/usb/serial/usbserial.ko mm/zsmalloc.ko net/8021q/8021q.ko +net/bluetooth/hidp/hidp.ko net/bluetooth/rfcomm/rfcomm.ko net/can/can-bcm.ko net/can/can-gw.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 4398b5c5074d..83ba885f292a 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -260,7 +260,7 @@ CONFIG_CAN_GW=m CONFIG_BT=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_HIDP=y +CONFIG_BT_HIDP=m CONFIG_BT_HCIBTSDIO=y CONFIG_BT_HCIUART=y CONFIG_BT_HCIUART_LL=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 735d5fac238e..ea7c25c52b7d 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -250,7 +250,7 @@ CONFIG_CAN_GW=m CONFIG_BT=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_HIDP=y +CONFIG_BT_HIDP=m CONFIG_BT_HCIBTSDIO=y CONFIG_BT_HCIUART=y CONFIG_BT_HCIUART_LL=y From d55c01c608c1a06e88a719ed8599bbb159eba130 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Tue, 15 Nov 2022 21:51:01 +0000 Subject: [PATCH 309/963] ANDROID: GKI: Convert BT HCI sdio & uart as modules Converts BT host controller sdio & uart interface drivers as modules: CONFIG_BT_HCIBTSDIO: HCI SDIO driver CONFIG_BT_HCIUART: HCI UART driver Converts HCI uart dependent protocol drivers as modules due to dependency on the CONFIG_BT_HCIUART. CONFIG_BT_HCIUART_QCA: Qualcomm Atheros protocol support CONFIG_BT_HCIUART_BCM: Broadcom protocol support Bug: 232431151 Test: TH Change-Id: I3a3a7ae03140774a78a56ac306b8aef1042b1ceb Signed-off-by: Ramji Jiyani (cherry picked from commit 78a20156d1fc07d4a5960a86935d4a3dd26d3fb1) --- BUILD.bazel | 4 ++++ android/gki_system_dlkm_modules | 4 ++++ arch/arm64/configs/gki_defconfig | 4 ++-- arch/x86/configs/gki_defconfig | 4 ++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 8dc5ba0e72a6..9e9183b58793 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -23,6 +23,10 @@ load("//build/kernel/kleaf:kernel.bzl", "ddk_headers") _common_gki_modules_list = [ # keep sorted "drivers/block/zram/zram.ko", + "drivers/bluetooth/btbcm.ko", + "drivers/bluetooth/btqca.ko", + "drivers/bluetooth/btsdio.ko", + "drivers/bluetooth/hci_uart.ko", "drivers/net/can/dev/can-dev.ko", "drivers/net/can/slcan/slcan.ko", "drivers/net/can/vcan.ko", diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index c22a1d4f23a5..ecef27a870e2 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -1,4 +1,8 @@ drivers/block/zram/zram.ko +drivers/bluetooth/btbcm.ko +drivers/bluetooth/btqca.ko +drivers/bluetooth/btsdio.ko +drivers/bluetooth/hci_uart.ko drivers/net/can/dev/can-dev.ko drivers/net/can/slcan/slcan.ko drivers/net/can/vcan.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 83ba885f292a..797f6ae63705 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -261,8 +261,8 @@ CONFIG_BT=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_HIDP=m -CONFIG_BT_HCIBTSDIO=y -CONFIG_BT_HCIUART=y +CONFIG_BT_HCIBTSDIO=m +CONFIG_BT_HCIUART=m CONFIG_BT_HCIUART_LL=y CONFIG_BT_HCIUART_BCM=y CONFIG_BT_HCIUART_QCA=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index ea7c25c52b7d..f6cfed0bf282 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -251,8 +251,8 @@ CONFIG_BT=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_HIDP=m -CONFIG_BT_HCIBTSDIO=y -CONFIG_BT_HCIUART=y +CONFIG_BT_HCIBTSDIO=m +CONFIG_BT_HCIUART=m CONFIG_BT_HCIUART_LL=y CONFIG_BT_HCIUART_BCM=y CONFIG_BT_HCIUART_QCA=y From c9b895c6878bdb6789dc1d7af60fd10f4a9f1937 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Mon, 14 Nov 2022 17:55:49 +0800 Subject: [PATCH 310/963] net: ag71xx: call phylink_disconnect_phy if ag71xx_hw_enable() fail in ag71xx_open() If ag71xx_hw_enable() fails, call phylink_disconnect_phy() to clean up. And if phylink_of_phy_connect() fails, nothing needs to be done. Compile tested only. Fixes: 892e09153fa3 ("net: ag71xx: port to phylink") Signed-off-by: Liu Jian Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20221114095549.40342-1-liujian56@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/atheros/ag71xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index cc932b3cf873..4a1efe9b37d0 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1427,7 +1427,7 @@ static int ag71xx_open(struct net_device *ndev) if (ret) { netif_err(ag, link, ndev, "phylink_of_phy_connect filed with err: %i\n", ret); - goto err; + return ret; } max_frame_len = ag71xx_max_frame_len(ndev->mtu); @@ -1448,6 +1448,7 @@ static int ag71xx_open(struct net_device *ndev) err: ag71xx_rings_cleanup(ag); + phylink_disconnect_phy(ag->phylink); return ret; } From 2929cceb2fcf0ded7182562e4888afafece82cce Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 14 Nov 2022 11:05:19 +0000 Subject: [PATCH 311/963] net/x25: Fix skb leak in x25_lapb_receive_frame() x25_lapb_receive_frame() using skb_copy() to get a private copy of skb, the new skb should be freed in the undersized/fragmented skb error handling path. Otherwise there is a memory leak. Fixes: cb101ed2c3c7 ("x25: Handle undersized/fragmented skbs") Signed-off-by: Wei Yongjun Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20221114110519.514538-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski --- net/x25/x25_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 5259ef8f5242..748d8630ab58 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -117,7 +117,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, if (!pskb_may_pull(skb, 1)) { x25_neigh_put(nb); - return 0; + goto drop; } switch (skb->data[0]) { From 4e0c19fcb8b5323716140fa82b79aa9f60e60407 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 14 Nov 2022 16:35:51 +0200 Subject: [PATCH 312/963] net: dsa: don't leak tagger-owned storage on switch driver unbind In the initial commit dc452a471dba ("net: dsa: introduce tagger-owned storage for private and shared data"), we had a call to tag_ops->disconnect(dst) issued from dsa_tree_free(), which is called at tree teardown time. There were problems with connecting to a switch tree as a whole, so this got reworked to connecting to individual switches within the tree. In this process, tag_ops->disconnect(ds) was made to be called only from switch.c (cross-chip notifiers emitted as a result of dynamic tag proto changes), but the normal driver teardown code path wasn't replaced with anything. Solve this problem by adding a function that does the opposite of dsa_switch_setup_tag_protocol(), which is called from the equivalent spot in dsa_switch_teardown(). The positioning here also ensures that we won't have any use-after-free in tagging protocol (*rcv) ops, since the teardown sequence is as follows: dsa_tree_teardown -> dsa_tree_teardown_master -> dsa_master_teardown -> unsets master->dsa_ptr, making no further packets match the ETH_P_XDSA packet type handler -> dsa_tree_teardown_ports -> dsa_port_teardown -> dsa_slave_destroy -> unregisters DSA net devices, there is even a synchronize_net() in unregister_netdevice_many() -> dsa_tree_teardown_switches -> dsa_switch_teardown -> dsa_switch_teardown_tag_protocol -> finally frees the tagger-owned storage Fixes: 7f2973149c22 ("net: dsa: make tagging protocols connect to individual switches from a tree") Signed-off-by: Vladimir Oltean Reviewed-by: Saeed Mahameed Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221114143551.1906361-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index e504a18fc125..5417f7b1187c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -864,6 +864,14 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds) return err; } +static void dsa_switch_teardown_tag_protocol(struct dsa_switch *ds) +{ + const struct dsa_device_ops *tag_ops = ds->dst->tag_ops; + + if (tag_ops->disconnect) + tag_ops->disconnect(ds); +} + static int dsa_switch_setup(struct dsa_switch *ds) { struct dsa_devlink_priv *dl_priv; @@ -953,6 +961,8 @@ static void dsa_switch_teardown(struct dsa_switch *ds) ds->slave_mii_bus = NULL; } + dsa_switch_teardown_tag_protocol(ds); + if (ds->ops->teardown) ds->ops->teardown(ds); From 869e1504e1afcb663e04e93954a771d44fa2a669 Mon Sep 17 00:00:00 2001 From: Nicolas Schier Date: Sat, 12 Nov 2022 09:07:15 +0100 Subject: [PATCH 313/963] MAINTAINERS: Add Nathan and Nicolas to Kbuild reviewers As suggested by Nick, add Nathan and myself to Kbuild reviewers to share more review responsibilities. Signed-off-by: Nicolas Schier Acked-by: Nick Desaulniers Acked-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 256f03904987..c81a68dfa2a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11093,7 +11093,9 @@ F: fs/autofs/ KERNEL BUILD + files below scripts/ (unless maintained elsewhere) M: Masahiro Yamada M: Michal Marek +R: Nathan Chancellor R: Nick Desaulniers +R: Nicolas Schier L: linux-kbuild@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git From 85c2d6d82774368d19b3febc4cfed5957a32e976 Mon Sep 17 00:00:00 2001 From: Nicolas Schier Date: Sat, 12 Nov 2022 09:07:16 +0100 Subject: [PATCH 314/963] MAINTAINERS: Remove Michal Marek from Kbuild maintainers Remove Michal Marek from Kbuild maintainers as there is no response from him since October 2017. Add an entry for Michal in CREDITS. Michal, thanks for maintaining Kbuild for almost eight years! Suggested-by: Nick Desaulniers Cc: Michal Marek Signed-off-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 54672cbcd719..198f675c419e 100644 --- a/CREDITS +++ b/CREDITS @@ -2452,6 +2452,10 @@ S: 482 Shadowgraph Dr. S: San Jose, CA 95110 S: USA +N: Michal Marek +E: michal.lkml@markovi.net +D: Kbuild Maintainer 2009-2017 + N: Martin Mares E: mj@ucw.cz W: http://www.ucw.cz/~mj/ diff --git a/MAINTAINERS b/MAINTAINERS index c81a68dfa2a7..8d34ef02334d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11092,7 +11092,6 @@ F: fs/autofs/ KERNEL BUILD + files below scripts/ (unless maintained elsewhere) M: Masahiro Yamada -M: Michal Marek R: Nathan Chancellor R: Nick Desaulniers R: Nicolas Schier From 26e01ee19b20695487ed6e19c75eb987642eeee9 Mon Sep 17 00:00:00 2001 From: Nicolas Schier Date: Sat, 12 Nov 2022 09:07:17 +0100 Subject: [PATCH 315/963] MAINTAINERS: Add linux-kbuild's patchwork Add patchwork URL for Kconfig and Kbuild. Signed-off-by: Nicolas Schier Reviewed-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d34ef02334d..d34f2652fc62 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11035,6 +11035,7 @@ KCONFIG M: Masahiro Yamada L: linux-kbuild@vger.kernel.org S: Maintained +Q: https://patchwork.kernel.org/project/linux-kbuild/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig F: Documentation/kbuild/kconfig* F: scripts/Kconfig.include @@ -11097,6 +11098,7 @@ R: Nick Desaulniers R: Nicolas Schier L: linux-kbuild@vger.kernel.org S: Maintained +Q: https://patchwork.kernel.org/project/linux-kbuild/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git F: Documentation/kbuild/ F: Makefile From 8d6e38f636ac063e8062a21e7616f7d9bf0df5d8 Mon Sep 17 00:00:00 2001 From: Tiago Dias Ferreira Date: Wed, 16 Nov 2022 00:17:56 -0300 Subject: [PATCH 316/963] nvme-pci: add NVME_QUIRK_BOGUS_NID for Netac NV7000 Added a quirk to fix the Netac NV7000 SSD reporting duplicate NGUIDs. Cc: Signed-off-by: Tiago Dias Ferreira Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bf708fd5adf6..f4335519399d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3521,6 +3521,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1f40, 0x5236), /* Netac Technologies Co. NV7000 NVMe SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e4B, 0x1001), /* MAXIO MAP1001 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */ From 0a52566279b4ee65ecd2503d7b7342851f84755c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 14 Nov 2022 08:45:02 +0200 Subject: [PATCH 317/963] nvmet: fix a memory leak in nvmet_auth_set_key When changing dhchap secrets we need to release the old secrets as well. kmemleak complaint: -- unreferenced object 0xffff8c7f44ed8180 (size 64): comm "check", pid 7304, jiffies 4295686133 (age 72034.246s) hex dump (first 32 bytes): 44 48 48 43 2d 31 3a 30 30 3a 4c 64 4c 4f 64 71 DHHC-1:00:LdLOdq 79 56 69 67 77 48 55 32 6d 5a 59 4c 7a 35 59 38 yVigwHU2mZYLz5Y8 backtrace: [<00000000b6fc5071>] kstrdup+0x2e/0x60 [<00000000f0f4633f>] 0xffffffffc0e07ee6 [<0000000053006c05>] 0xffffffffc0dff783 [<00000000419ae922>] configfs_write_iter+0xb1/0x120 [<000000008183c424>] vfs_write+0x2be/0x3c0 [<000000009005a2a5>] ksys_write+0x5f/0xe0 [<00000000cd495c89>] do_syscall_64+0x38/0x90 [<00000000f2a84ac5>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: db1312dd9548 ("nvmet: implement basic In-Band Authentication") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/auth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index c4113b43dbfe..4dcddcf95279 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -45,9 +45,11 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, if (!dhchap_secret) return -ENOMEM; if (set_ctrl) { + kfree(host->dhchap_ctrl_secret); host->dhchap_ctrl_secret = strim(dhchap_secret); host->dhchap_ctrl_key_hash = key_hash; } else { + kfree(host->dhchap_secret); host->dhchap_secret = strim(dhchap_secret); host->dhchap_key_hash = key_hash; } From 92bbd67a55fee50743b42825d1c016e7fd5c79f9 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 15 Nov 2022 18:39:34 +0800 Subject: [PATCH 318/963] cifs: Fix wrong return value checking when GETFLAGS The return value of CIFSGetExtAttr is negative, should be checked with -EOPNOTSUPP rather than EOPNOTSUPP. Fixes: 64a5cfa6db94 ("Allow setting per-file compression via SMB2/3") Signed-off-by: Zhang Xiaoxu Signed-off-by: Steve French --- fs/cifs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 89d5fa887364..6419ec47c2a8 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -343,7 +343,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = put_user(ExtAttrBits & FS_FL_USER_VISIBLE, (int __user *)arg); - if (rc != EOPNOTSUPP) + if (rc != -EOPNOTSUPP) break; } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ @@ -373,7 +373,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) * pSMBFile->fid.netfid, * extAttrBits, * &ExtAttrMask); - * if (rc != EOPNOTSUPP) + * if (rc != -EOPNOTSUPP) * break; */ From 7dd12d65ac646046a3fe0bbf9a4e86f4514207b3 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 25 Oct 2022 13:39:31 +0900 Subject: [PATCH 319/963] zonefs: fix zone report size in __zonefs_io_error() When an IO error occurs, the function __zonefs_io_error() is used to issue a zone report to obtain the latest zone information from the device. This function gets a zone report for all zones used as storage for a file, which is always 1 zone except for files representing aggregated conventional zones. The number of zones of a zone report for a file is calculated in __zonefs_io_error() by doing a bit-shift of the inode i_zone_size field, which is equal to or larger than the device zone size. However, this calculation does not take into account that the last zone of a zoned device may be smaller than the zone size reported by bdev_zone_sectors() (which is used to set the bit shift size). As a result, if an error occurs for an IO targetting such last smaller zone, the zone report will ask for 0 zones, leading to an invalid zone report. Fix this by using the fact that all files require a 1 zone report, except if the inode i_zone_size field indicates a zone size larger than the device zone size. This exception case corresponds to a mount with aggregated conventional zones. A check for this exception is added to the file inode initialization during mount. If an invalid setup is detected, emit an error and fail the mount (check contributed by Johannes Thumshirn). Signed-off-by: Johannes Thumshirn Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 860f0b1032c6..abc9a85106f2 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -478,14 +478,22 @@ static void __zonefs_io_error(struct inode *inode, bool write) struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); unsigned int noio_flag; - unsigned int nr_zones = - zi->i_zone_size >> (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + unsigned int nr_zones = 1; struct zonefs_ioerr_data err = { .inode = inode, .write = write, }; int ret; + /* + * The only files that have more than one zone are conventional zone + * files with aggregated conventional zones, for which the inode zone + * size is always larger than the device zone size. + */ + if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev)) + nr_zones = zi->i_zone_size >> + (sbi->s_zone_sectors_shift + SECTOR_SHIFT); + /* * Memory allocations in blkdev_report_zones() can trigger a memory * reclaim which may in turn cause a recursion into zonefs as well as @@ -1407,6 +1415,14 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, zi->i_ztype = type; zi->i_zsector = zone->start; zi->i_zone_size = zone->len << SECTOR_SHIFT; + if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT && + !(sbi->s_features & ZONEFS_F_AGGRCNV)) { + zonefs_err(sb, + "zone size %llu doesn't match device's zone sectors %llu\n", + zi->i_zone_size, + bdev_zone_sectors(sb->s_bdev) << SECTOR_SHIFT); + return -EINVAL; + } zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, zone->capacity << SECTOR_SHIFT); @@ -1456,11 +1472,11 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, struct inode *dir = d_inode(parent); struct dentry *dentry; struct inode *inode; - int ret; + int ret = -ENOMEM; dentry = d_alloc_name(parent, name); if (!dentry) - return NULL; + return ERR_PTR(ret); inode = new_inode(parent->d_sb); if (!inode) @@ -1485,7 +1501,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, dput: dput(dentry); - return NULL; + return ERR_PTR(ret); } struct zonefs_zone_data { @@ -1505,7 +1521,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, struct blk_zone *zone, *next, *end; const char *zgroup_name; char *file_name; - struct dentry *dir; + struct dentry *dir, *dent; unsigned int n = 0; int ret; @@ -1523,8 +1539,8 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, zgroup_name = "seq"; dir = zonefs_create_inode(sb->s_root, zgroup_name, NULL, type); - if (!dir) { - ret = -ENOMEM; + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); goto free; } @@ -1570,8 +1586,9 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, * Use the file number within its group as file name. */ snprintf(file_name, ZONEFS_NAME_MAX - 1, "%u", n); - if (!zonefs_create_inode(dir, file_name, zone, type)) { - ret = -ENOMEM; + dent = zonefs_create_inode(dir, file_name, zone, type); + if (IS_ERR(dent)) { + ret = PTR_ERR(dent); goto free; } From 61ba9e9712e187e019e6451bb9fc8eb24685fc50 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 31 Oct 2022 11:30:31 +0900 Subject: [PATCH 320/963] zonefs: Remove to_attr() helper function to_attr() in zonefs sysfs code is unused, which it causes a warning when compiling with clang and W=1. Delete it to prevent the warning. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn --- fs/zonefs/sysfs.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c index 9cb6755ce39a..9920689dc098 100644 --- a/fs/zonefs/sysfs.c +++ b/fs/zonefs/sysfs.c @@ -15,11 +15,6 @@ struct zonefs_sysfs_attr { ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf); }; -static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr) -{ - return container_of(attr, struct zonefs_sysfs_attr, attr); -} - #define ZONEFS_SYSFS_ATTR_RO(name) \ static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name) From 4a567d164d0e0c57e7b694b988db86361f130cb7 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Wed, 16 Nov 2022 00:14:40 +0100 Subject: [PATCH 321/963] platform/surface: aggregator_registry: Add support for Surface Laptop 5 Add device nodes to enable support for battery and charger status, the ACPI platform profile, as well as internal HID devices (including touchpad and keyboard) on the Surface Laptop 5. Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221115231440.1338142-1-luzmaximilian@gmail.com Signed-off-by: Hans de Goede --- .../surface/surface_aggregator_registry.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index db82c2a7c567..023f126121d7 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -234,6 +234,19 @@ static const struct software_node *ssam_node_group_sl3[] = { NULL, }; +/* Devices for Surface Laptop 5. */ +static const struct software_node *ssam_node_group_sl5[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_pprof, + &ssam_node_hid_main_keyboard, + &ssam_node_hid_main_touchpad, + &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Studio. */ static const struct software_node *ssam_node_group_sls[] = { &ssam_node_root, @@ -345,6 +358,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop 4 (13", Intel) */ { "MSHW0250", (unsigned long)ssam_node_group_sl3 }, + /* Surface Laptop 5 */ + { "MSHW0350", (unsigned long)ssam_node_group_sl5 }, + /* Surface Laptop Go 1 */ { "MSHW0118", (unsigned long)ssam_node_group_slg1 }, From 8b9b6a044b408283b086702b1d9e3cf4ba45b426 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 14 Nov 2022 15:38:41 +0800 Subject: [PATCH 322/963] platform/x86: hp-wmi: Ignore Smart Experience App event Sometimes hp-wmi driver complains on system resume: [ 483.116451] hp_wmi: Unknown event_id - 33 - 0x0 According to HP it's a feature called "HP Smart Experience App" and it's safe to be ignored. Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20221114073842.205392-1-kai.heng.feng@canonical.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/hp-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 12449038bed1..0a99058be813 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -90,6 +90,7 @@ enum hp_wmi_event_ids { HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, HPWMI_SANITIZATION_MODE = 0x17, + HPWMI_SMART_EXPERIENCE_APP = 0x21, }; /* @@ -859,6 +860,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_SANITIZATION_MODE: break; + case HPWMI_SMART_EXPERIENCE_APP: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; From 81a5603a0f50fd7cf17ff21d106052215eaf2028 Mon Sep 17 00:00:00 2001 From: Arnav Rawat Date: Fri, 11 Nov 2022 14:32:09 +0000 Subject: [PATCH 323/963] platform/x86: ideapad-laptop: Fix interrupt storm on fn-lock toggle on some Yoga laptops Commit 3ae86d2d4704 ("platform/x86: ideapad-laptop: Fix Legion 5 Fn lock LED") uses the WMI event-id for the fn-lock event on some Legion 5 laptops to manually toggle the fn-lock LED because the EC does not do it itself. However, the same WMI ID is also sent on some Yoga laptops. Here, setting the fn-lock state is not valid behavior, and causes the EC to spam interrupts until the laptop is rebooted. Add a set_fn_lock_led_list[] DMI-id list and only enable the workaround to manually set the LED on models on this list. Link: https://bugzilla.kernel.org/show_bug.cgi?id=212671 Cc: Meng Dong Signed-off-by: Arnav Rawat Link: https://lore.kernel.org/r/12093851.O9o76ZdvQC@fedora [hdegoede@redhat.com: Check DMI-id list only once and store the result] Signed-off-by: Hans de Goede --- drivers/platform/x86/ideapad-laptop.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 33b3dfdd1b08..6c460cdc05bb 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -136,6 +136,7 @@ struct ideapad_private { bool dytc : 1; bool fan_mode : 1; bool fn_lock : 1; + bool set_fn_lock_led : 1; bool hw_rfkill_switch : 1; bool kbd_bl : 1; bool touchpad_ctrl_via_ec : 1; @@ -1501,6 +1502,9 @@ static void ideapad_wmi_notify(u32 value, void *context) ideapad_input_report(priv, value); break; case 208: + if (!priv->features.set_fn_lock_led) + break; + if (!eval_hals(priv->adev->handle, &result)) { bool state = test_bit(HALS_FNLOCK_STATE_BIT, &result); @@ -1514,6 +1518,18 @@ static void ideapad_wmi_notify(u32 value, void *context) } #endif +/* On some models we need to call exec_sals(SALS_FNLOCK_ON/OFF) to set the LED */ +static const struct dmi_system_id set_fn_lock_led_list[] = { + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=212671 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion R7000P2020H"), + } + }, + {} +}; + /* * Some ideapads have a hardware rfkill switch, but most do not have one. * Reading VPCCMD_R_RF always results in 0 on models without a hardware rfkill, @@ -1556,6 +1572,7 @@ static void ideapad_check_features(struct ideapad_private *priv) acpi_handle handle = priv->adev->handle; unsigned long val; + priv->features.set_fn_lock_led = dmi_check_system(set_fn_lock_led_list); priv->features.hw_rfkill_switch = dmi_check_system(hw_rfkill_list); /* Most ideapads with ELAN0634 touchpad don't use EC touchpad switch */ From b44fd994e45112b58b6c1dec4451d9a925784589 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 15 Nov 2022 20:34:00 +0100 Subject: [PATCH 324/963] platform/x86: ideapad-laptop: Add module parameters to match DMI quirk tables Add module parameters to allow setting the hw_rfkill_switch and set_fn_lock_led feature flags for testing these on laptops which are not on the DMI-id based allow lists for these 2 flags. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221115193400.376159-1-hdegoede@redhat.com --- drivers/platform/x86/ideapad-laptop.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 6c460cdc05bb..3ea8fc6a9ca3 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -155,7 +155,21 @@ MODULE_PARM_DESC(no_bt_rfkill, "No rfkill for bluetooth."); static bool allow_v4_dytc; module_param(allow_v4_dytc, bool, 0444); -MODULE_PARM_DESC(allow_v4_dytc, "Enable DYTC version 4 platform-profile support."); +MODULE_PARM_DESC(allow_v4_dytc, + "Enable DYTC version 4 platform-profile support. " + "If you need this please report this to: platform-driver-x86@vger.kernel.org"); + +static bool hw_rfkill_switch; +module_param(hw_rfkill_switch, bool, 0444); +MODULE_PARM_DESC(hw_rfkill_switch, + "Enable rfkill support for laptops with a hw on/off wifi switch/slider. " + "If you need this please report this to: platform-driver-x86@vger.kernel.org"); + +static bool set_fn_lock_led; +module_param(set_fn_lock_led, bool, 0444); +MODULE_PARM_DESC(set_fn_lock_led, + "Enable driver based updates of the fn-lock LED on fn-lock changes. " + "If you need this please report this to: platform-driver-x86@vger.kernel.org"); /* * ACPI Helpers @@ -1572,8 +1586,10 @@ static void ideapad_check_features(struct ideapad_private *priv) acpi_handle handle = priv->adev->handle; unsigned long val; - priv->features.set_fn_lock_led = dmi_check_system(set_fn_lock_led_list); - priv->features.hw_rfkill_switch = dmi_check_system(hw_rfkill_list); + priv->features.set_fn_lock_led = + set_fn_lock_led || dmi_check_system(set_fn_lock_led_list); + priv->features.hw_rfkill_switch = + hw_rfkill_switch || dmi_check_system(hw_rfkill_list); /* Most ideapads with ELAN0634 touchpad don't use EC touchpad switch */ if (acpi_dev_present("ELAN0634", NULL, -1)) From ba86af3733aece88dbcee0dfebf7e2dcfefb2be4 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 14 Nov 2022 21:38:52 +0800 Subject: [PATCH 325/963] net: lan966x: Fix potential null-ptr-deref in lan966x_stats_init() lan966x_stats_init() calls create_singlethread_workqueue() and not checked the ret value, which may return NULL. And a null-ptr-deref may happen: lan966x_stats_init() create_singlethread_workqueue() # failed, lan966x->stats_queue is NULL queue_delayed_work() queue_delayed_work_on() __queue_delayed_work() # warning here, but continue __queue_work() # access wq->flags, null-ptr-deref Check the ret value and return -ENOMEM if it is NULL. Fixes: 12c2d0a5b8e2 ("net: lan966x: add ethtool configuration and statistics") Signed-off-by: Shang XiaoJing Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index fea42542be28..06811c60d598 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -716,6 +716,9 @@ int lan966x_stats_init(struct lan966x *lan966x) snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(lan966x->dev)); lan966x->stats_queue = create_singlethread_workqueue(queue_name); + if (!lan966x->stats_queue) + return -ENOMEM; + INIT_DELAYED_WORK(&lan966x->stats_work, lan966x_check_stats_work); queue_delayed_work(lan966x->stats_queue, &lan966x->stats_work, LAN966X_STATS_CHECK_DELAY); From 639f5d006e36bb303f525d9479448c412b720c39 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 14 Nov 2022 21:38:53 +0800 Subject: [PATCH 326/963] net: microchip: sparx5: Fix potential null-ptr-deref in sparx_stats_init() and sparx5_start() sparx_stats_init() calls create_singlethread_workqueue() and not checked the ret value, which may return NULL. And a null-ptr-deref may happen: sparx_stats_init() create_singlethread_workqueue() # failed, sparx5->stats_queue is NULL queue_delayed_work() queue_delayed_work_on() __queue_delayed_work() # warning here, but continue __queue_work() # access wq->flags, null-ptr-deref Check the ret value and return -ENOMEM if it is NULL. So as sparx5_start(). Fixes: af4b11022e2d ("net: sparx5: add ethtool configuration and statistics support") Fixes: b37a1bae742f ("net: sparx5: add mactable support") Signed-off-by: Shang XiaoJing Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c | 3 +++ drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index 6b0febcb7fa9..01f3a3a41cdb 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1253,6 +1253,9 @@ int sparx_stats_init(struct sparx5 *sparx5) snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(sparx5->dev)); sparx5->stats_queue = create_singlethread_workqueue(queue_name); + if (!sparx5->stats_queue) + return -ENOMEM; + INIT_DELAYED_WORK(&sparx5->stats_work, sparx5_check_stats_work); queue_delayed_work(sparx5->stats_queue, &sparx5->stats_work, SPX5_STATS_CHECK_DELAY); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 62a325e96345..eeac04b84638 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -659,6 +659,9 @@ static int sparx5_start(struct sparx5 *sparx5) snprintf(queue_name, sizeof(queue_name), "%s-mact", dev_name(sparx5->dev)); sparx5->mact_queue = create_singlethread_workqueue(queue_name); + if (!sparx5->mact_queue) + return -ENOMEM; + INIT_DELAYED_WORK(&sparx5->mact_work, sparx5_mact_pull_work); queue_delayed_work(sparx5->mact_queue, &sparx5->mact_work, SPX5_MACT_PULL_DELAY); From baa014b9543c8e5e94f5d15b66abfe60750b8284 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 14 Nov 2022 10:10:29 +0530 Subject: [PATCH 327/963] perf/x86/amd: Fix crash due to race between amd_pmu_enable_all, perf NMI and throttling amd_pmu_enable_all() does: if (!test_bit(idx, cpuc->active_mask)) continue; amd_pmu_enable_event(cpuc->events[idx]); A perf NMI of another event can come between these two steps. Perf NMI handler internally disables and enables _all_ events, including the one which nmi-intercepted amd_pmu_enable_all() was in process of enabling. If that unintentionally enabled event has very low sampling period and causes immediate successive NMI, causing the event to be throttled, cpuc->events[idx] and cpuc->active_mask gets cleared by x86_pmu_stop(). This will result in amd_pmu_enable_event() getting called with event=NULL when amd_pmu_enable_all() resumes after handling the NMIs. This causes a kernel crash: BUG: kernel NULL pointer dereference, address: 0000000000000198 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page [...] Call Trace: amd_pmu_enable_all+0x68/0xb0 ctx_resched+0xd9/0x150 event_function+0xb8/0x130 ? hrtimer_start_range_ns+0x141/0x4a0 ? perf_duration_warn+0x30/0x30 remote_function+0x4d/0x60 __flush_smp_call_function_queue+0xc4/0x500 flush_smp_call_function_queue+0x11d/0x1b0 do_idle+0x18f/0x2d0 cpu_startup_entry+0x19/0x20 start_secondary+0x121/0x160 secondary_startup_64_no_verify+0xe5/0xeb amd_pmu_disable_all()/amd_pmu_enable_all() calls inside perf NMI handler were recently added as part of BRS enablement but I'm not sure whether we really need them. We can just disable BRS in the beginning and enable it back while returning from NMI. This will solve the issue by not enabling those events whose active_masks are set but are not yet enabled in hw pmu. Fixes: ada543459cab ("perf/x86/amd: Add AMD Fam19h Branch Sampling support") Reported-by: Linux Kernel Functional Testing Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221114044029.373-1-ravi.bangoria@amd.com --- arch/x86/events/amd/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 8b70237c33f7..d6f3703e4119 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -861,8 +861,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) pmu_enabled = cpuc->enabled; cpuc->enabled = 0; - /* stop everything (includes BRS) */ - amd_pmu_disable_all(); + amd_brs_disable_all(); /* Drain BRS is in use (could be inactive) */ if (cpuc->lbr_users) @@ -873,7 +872,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) cpuc->enabled = pmu_enabled; if (pmu_enabled) - amd_pmu_enable_all(0); + amd_brs_enable_all(); return amd_pmu_adjust_nmi_window(handled); } From ce0d998be9274dd3a3d971cbeaa6fe28fd2c3062 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Sat, 12 Nov 2022 17:15:08 +0200 Subject: [PATCH 328/963] perf/x86/intel/pt: Fix sampling using single range output Deal with errata TGL052, ADL037 and RPL017 "Trace May Contain Incorrect Data When Configured With Single Range Output Larger Than 4KB" by disabling single range output whenever larger than 4KB. Fixes: 670638477aed ("perf/x86/intel/pt: Opportunistically use single range output mode") Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221112151508.13768-1-adrian.hunter@intel.com --- arch/x86/events/intel/pt.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 82ef87e9a897..42a55794004a 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1263,6 +1263,15 @@ static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages) if (1 << order != nr_pages) goto out; + /* + * Some processors cannot always support single range for more than + * 4KB - refer errata TGL052, ADL037 and RPL017. Future processors might + * also be affected, so for now rather than trying to keep track of + * which ones, just disable it for all. + */ + if (nr_pages > 1) + goto out; + buf->single = true; buf->nr_pages = nr_pages; ret = 0; From 23df39fc6a36183af5e6e4f47523f1ad2cdc1d30 Mon Sep 17 00:00:00 2001 From: Guo Jin Date: Tue, 8 Nov 2022 14:01:26 +0800 Subject: [PATCH 329/963] locking: Fix qspinlock/x86 inline asm error When compiling linux 6.1.0-rc3 configured with CONFIG_64BIT=y and CONFIG_PARAVIRT_SPINLOCKS=y on x86_64 using LLVM 11.0, an error: " error: changed section flags for .spinlock.text, expected:: 0x6" occurred. The reason is the .spinlock.text in kernel/locking/qspinlock.o is used many times, but its flags are omitted in subsequent use. LLVM 11.0 assembler didn't permit to leave out flags in subsequent uses of the same sections. So this patch adds the corresponding flags to avoid above error. Fixes: 501f7f69bca1 ("locking: Add __lockfunc to slow path functions") Signed-off-by: Guo Jin Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20221108060126.2505-1-guoj17@chinatelecom.cn --- arch/x86/include/asm/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 60ece592b220..dbb38a6b4dfb 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -37,7 +37,7 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); * rsi = lockval (second argument) * rdx = internal variable (set to 0) */ -asm (".pushsection .spinlock.text;" +asm (".pushsection .spinlock.text, \"ax\";" ".globl " PV_UNLOCK ";" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" From f524b7289bbb0c8ffaa2ba3c34c146e43da54fb2 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 14:22:25 +0000 Subject: [PATCH 330/963] net: thunderbolt: Fix error handling in tbnet_init() A problem about insmod thunderbolt-net failed is triggered with following log given while lsmod does not show thunderbolt_net: insmod: ERROR: could not insert module thunderbolt-net.ko: File exists The reason is that tbnet_init() returns tb_register_service_driver() directly without checking its return value, if tb_register_service_driver() failed, it returns without removing property directory, resulting the property directory can never be created later. tbnet_init() tb_register_property_dir() # register property directory tb_register_service_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without remove property directory Fix by remove property directory when tb_register_service_driver() returns error. Fixes: e69b6c02b4c3 ("net: Add support for networking over Thunderbolt cable") Signed-off-by: Yuan Can Acked-by: Mika Westerberg Signed-off-by: David S. Miller --- drivers/net/thunderbolt.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 83fcaeb2ac5e..a52ee2bf5575 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -1391,12 +1391,21 @@ static int __init tbnet_init(void) tb_property_add_immediate(tbnet_dir, "prtcstns", flags); ret = tb_register_property_dir("network", tbnet_dir); - if (ret) { - tb_property_free_dir(tbnet_dir); - return ret; - } + if (ret) + goto err_free_dir; - return tb_register_service_driver(&tbnet_driver); + ret = tb_register_service_driver(&tbnet_driver); + if (ret) + goto err_unregister; + + return 0; + +err_unregister: + tb_unregister_property_dir("network", tbnet_dir); +err_free_dir: + tb_property_free_dir(tbnet_dir); + + return ret; } module_init(tbnet_init); From 2d77de1581bb5b470486edaf17a7d70151131afd Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Tue, 27 Sep 2022 13:22:11 +0530 Subject: [PATCH 331/963] scripts/faddr2line: Fix regression in name resolution on ppc64le Commit 1d1a0e7c5100 ("scripts/faddr2line: Fix overlapping text section failures") can cause faddr2line to fail on ppc64le on some distributions, while it works fine on other distributions. The failure can be attributed to differences in the readelf output. $ ./scripts/faddr2line vmlinux find_busiest_group+0x00 no match for find_busiest_group+0x00 On ppc64le, readelf adds the localentry tag before the symbol name on some distributions, and adds the localentry tag after the symbol name on other distributions. This problem has been discussed previously: https://lore.kernel.org/bpf/20191211160133.GB4580@calabresa/ This problem can be overcome by filtering out the localentry tags in the readelf output. Similar fixes are already present in the kernel by way of the following commits: 1fd6cee127e2 ("libbpf: Fix VERSIONED_SYM_COUNT number parsing") aa915931ac3e ("libbpf: Fix readelf output parsing for Fedora") [jpoimboe: rework commit log] Fixes: 1d1a0e7c5100 ("scripts/faddr2line: Fix overlapping text section failures") Signed-off-by: Srikar Dronamraju Acked-by: Naveen N. Rao Reviewed-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20220927075211.897152-1-srikar@linux.vnet.ibm.com Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra --- scripts/faddr2line | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 5514c23f45c2..0e73aca4f908 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -74,7 +74,8 @@ command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" find_dir_prefix() { local objfile=$1 - local start_kernel_addr=$(${READELF} --symbols --wide $objfile | ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') + local start_kernel_addr=$(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | + ${AWK} '$8 == "start_kernel" {printf "0x%s", $2}') [[ -z $start_kernel_addr ]] && return local file_line=$(${ADDR2LINE} -e $objfile $start_kernel_addr) @@ -178,7 +179,7 @@ __faddr2line() { found=2 break fi - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v sec=$sym_sec '$7 == sec' | sort --key=2) if [[ $found = 0 ]]; then warn "can't find symbol: sym_name: $sym_name sym_sec: $sym_sec sym_addr: $sym_addr sym_elf_size: $sym_elf_size" @@ -259,7 +260,7 @@ __faddr2line() { DONE=1 - done < <(${READELF} --symbols --wide $objfile | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') + done < <(${READELF} --symbols --wide $objfile | sed 's/\[.*\]//' | ${AWK} -v fn=$sym_name '$4 == "FUNC" && $8 == fn') } [[ $# -lt 2 ]] && usage From eb761a1760bf30cf64e98ee8d914866e62ec9e8a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 16 Nov 2022 14:39:53 +1000 Subject: [PATCH 332/963] powerpc: Fix writable sections being moved into the rodata region .data.rel.ro* catches .data.rel.root_cpuacct, and the kernel crashes on a store in css_clear_dir. At least we know read-only data protection is working... Fixes: b6adc6d6d3272 ("powerpc/build: move .data.rel.ro, .sdata2 to read-only") Signed-off-by: Nicholas Piggin Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221116043954.3307852-1-npiggin@gmail.com --- arch/powerpc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7786e3ac7611..8c3862b4c259 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -142,7 +142,7 @@ SECTIONS #endif .data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) { - *(.data.rel.ro*) + *(.data.rel.ro .data.rel.ro.*) } .branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) { From a41a11b4009580edb6e2b4c76e5e2ee303f87157 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 27 Oct 2022 16:19:38 +0200 Subject: [PATCH 333/963] s390/dcssblk: fix deadlock when adding a DCSS After the rework from commit 1ebe2e5f9d68 ("block: remove GENHD_FL_EXT_DEVT"), when calling device_add_disk(), dcssblk will end up in disk_scan_partitions(), and not break out early w/o GENHD_FL_NO_PART. This will trigger implicit open/release via blkdev_get/put_whole() later. dcssblk_release() will then deadlock on dcssblk_devices_sem semaphore, which is already held from dcssblk_add_store() when calling device_add_disk(). dcssblk does not support partitions (DCSSBLK_MINORS_PER_DISK == 1), and never scanned partitions before. Therefore restore the previous behavior, and explicitly disallow partition scanning by setting the GENHD_FL_NO_PART flag. This will also prevent this deadlock scenario. Fixes: 1ebe2e5f9d68 ("block: remove GENHD_FL_EXT_DEVT") Cc: # 5.17+ Signed-off-by: Gerald Schaefer Acked-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- drivers/s390/block/dcssblk.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 93b80da60277..b392b9f5482e 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -636,6 +636,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->minors = DCSSBLK_MINORS_PER_DISK; dev_info->gd->fops = &dcssblk_devops; dev_info->gd->private_data = dev_info; + dev_info->gd->flags |= GENHD_FL_NO_PART; blk_queue_logical_block_size(dev_info->gd->queue, 4096); blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->gd->queue); From e3c11025bcd2142a61abe5806b2f86a0e78118df Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 4 Nov 2022 12:06:47 +0100 Subject: [PATCH 334/963] s390: avoid using global register for current_stack_pointer Commit 30de14b1884b ("s390: current_stack_pointer shouldn't be a function") made current_stack_pointer a global register variable like on many other architectures. Unfortunately on s390 it uncovers old gcc bug which is fixed only since gcc-9.1 [gcc commit 3ad7fed1cc87 ("S/390: Fix PR89775. Stackpointer save/restore instructions removed")] and backported to gcc-8.4 and later. Due to this bug gcc versions prior to 8.4 generate broken code which leads to stack corruptions. Current minimal gcc version required to build the kernel is declared as 5.1. It is not possible to fix all old gcc versions, so work around this problem by avoiding using global register variable for current_stack_pointer. Fixes: 30de14b1884b ("s390: current_stack_pointer shouldn't be a function") Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/processor.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 87be3e855bf7..c907f747d2a0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -199,7 +199,16 @@ unsigned long __get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) -register unsigned long current_stack_pointer asm("r15"); +/* avoid using global register due to gcc bug in versions < 8.4 */ +#define current_stack_pointer (__current_stack_pointer()) + +static __always_inline unsigned long __current_stack_pointer(void) +{ + unsigned long sp; + + asm volatile("lgr %0,15" : "=d" (sp)); + return sp; +} static __always_inline unsigned short stap(void) { From c7d7d4e7bb1290cc473610b0bb96d9fa606d00e7 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 16 Nov 2022 17:03:18 +0800 Subject: [PATCH 335/963] ASoC: rt711-sdca: fix the latency time of clock stop prepare state machine transitions Due to the hardware behavior, it takes some time for CBJ detection/impedance sensing/de-bounce. The ClockStop_NotFinished flag will be raised until these functions are completed. In ClockStopMode0 mode case, the SdW controller might check this flag from D3 to D0 when the jack detection interrupt happened. Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20221116090318.5017-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt711-sdca-sdw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c index 4120842fe699..88a8392a58ed 100644 --- a/sound/soc/codecs/rt711-sdca-sdw.c +++ b/sound/soc/codecs/rt711-sdca-sdw.c @@ -230,7 +230,7 @@ static int rt711_sdca_read_prop(struct sdw_slave *slave) } /* set the timeout values */ - prop->clk_stop_timeout = 20; + prop->clk_stop_timeout = 700; /* wake-up event */ prop->wake_capable = 1; From 60591bbf6d5eb44f275eb733943b7757325c1b60 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 16 Nov 2022 16:25:08 +0800 Subject: [PATCH 336/963] ASoC: max98373: Add checks for devm_kcalloc As the devm_kcalloc may return NULL pointer, it should be better to check the return value in order to avoid NULL poineter dereference. Fixes: 349dd23931d1 ("ASoC: max98373: don't access volatile registers in bias level off") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20221116082508.17418-1-jiasheng@iscas.ac.cn Signed-off-by: Mark Brown --- sound/soc/codecs/max98373-i2c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/max98373-i2c.c b/sound/soc/codecs/max98373-i2c.c index 3e04c7f0cce4..ec0905df65d1 100644 --- a/sound/soc/codecs/max98373-i2c.c +++ b/sound/soc/codecs/max98373-i2c.c @@ -549,6 +549,10 @@ static int max98373_i2c_probe(struct i2c_client *i2c) max98373->cache = devm_kcalloc(&i2c->dev, max98373->cache_num, sizeof(*max98373->cache), GFP_KERNEL); + if (!max98373->cache) { + ret = -ENOMEM; + return ret; + } for (i = 0; i < max98373->cache_num; i++) max98373->cache[i].reg = max98373_i2c_cache_reg[i]; From 5f4b204b6b8153923d5be8002c5f7082985d153f Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Wed, 16 Nov 2022 15:43:39 +0800 Subject: [PATCH 337/963] regulator: core: fix kobject release warning and memory leak in regulator_register() Here is a warning report about lack of registered release() from kobject lib: Device '(null)' does not have a release() function, it is broken and must be fixed. WARNING: CPU: 0 PID: 48430 at drivers/base/core.c:2332 device_release+0x104/0x120 Call Trace: kobject_put+0xdc/0x180 put_device+0x1b/0x30 regulator_register+0x651/0x1170 devm_regulator_register+0x4f/0xb0 When regulator_register() returns fail and directly goto `clean` symbol, rdev->dev has not registered release() function yet (which is registered by regulator_class in the following), so rdev needs to be freed manually. If rdev->dev.of_node is not NULL, which means the of_node has gotten by regulator_of_get_init_data(), it needs to call of_node_put() to avoid refcount leak. Otherwise, only calling put_device() would lead memory leak of rdev in further: unreferenced object 0xffff88810d0b1000 (size 2048): comm "107-i2c-rtq6752", pid 48430, jiffies 4342258431 (age 1341.780s) backtrace: kmalloc_trace+0x22/0x110 regulator_register+0x184/0x1170 devm_regulator_register+0x4f/0xb0 When regulator_register() returns fail and goto `wash` symbol, rdev->dev has registered release() function, so directly call put_device() to cleanup everything. Fixes: d3c731564e09 ("regulator: plug of_node leak in regulator_register()'s error path") Signed-off-by: Zeng Heng Link: https://lore.kernel.org/r/20221116074339.1024240-1-zengheng4@huawei.com Signed-off-by: Mark Brown --- drivers/regulator/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 28879f8974d9..b3a0f6df91c0 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5639,11 +5639,15 @@ regulator_register(const struct regulator_desc *regulator_desc, mutex_lock(®ulator_list_mutex); regulator_ena_gpio_free(rdev); mutex_unlock(®ulator_list_mutex); + put_device(&rdev->dev); + rdev = NULL; clean: if (dangling_of_gpiod) gpiod_put(config->ena_gpiod); + if (rdev && rdev->dev.of_node) + of_node_put(rdev->dev.of_node); + kfree(rdev); kfree(config); - put_device(&rdev->dev); rinse: if (dangling_cfg_gpiod) gpiod_put(cfg->ena_gpiod); From 7920e0fbced429ab18ad4402e3914146a6a0921b Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 16 Nov 2022 17:29:43 +0800 Subject: [PATCH 338/963] regulator: rt5759: fix OOB in validate_desc() I got the following OOB report: BUG: KASAN: slab-out-of-bounds in validate_desc+0xba/0x109 Read of size 8 at addr ffff888107db8ff0 by task python3/253 Call Trace: dump_stack_lvl+0x67/0x83 print_report+0x178/0x4b0 kasan_report+0x90/0x190 validate_desc+0xba/0x109 gpiod_set_value_cansleep+0x40/0x5a regulator_ena_gpio_ctrl+0x93/0xfc _regulator_do_enable.cold.61+0x89/0x163 set_machine_constraints+0x140a/0x159c regulator_register.cold.73+0x762/0x10cd devm_regulator_register+0x57/0xb0 rt5759_probe+0x3a0/0x4ac [rt5759_regulator] The desc used in validate_desc() is passed from 'reg_cfg.ena_gpiod', which is not initialized. Fix this by initializing 'reg_cfg' to 0. Fixes: 7b36ddb208bd ("regulator: rt5759: Add support for Richtek RT5759 DCDC converter") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221116092943.1668326-1-yangyingliang@huawei.com Signed-off-by: Mark Brown --- drivers/regulator/rt5759-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/rt5759-regulator.c b/drivers/regulator/rt5759-regulator.c index 6b96899eb27e..8488417f4b2c 100644 --- a/drivers/regulator/rt5759-regulator.c +++ b/drivers/regulator/rt5759-regulator.c @@ -243,6 +243,7 @@ static int rt5759_regulator_register(struct rt5759_priv *priv) if (priv->chip_type == CHIP_TYPE_RT5759A) reg_desc->uV_step = RT5759A_STEP_UV; + memset(®_cfg, 0, sizeof(reg_cfg)); reg_cfg.dev = priv->dev; reg_cfg.of_node = np; reg_cfg.init_data = of_get_regulator_init_data(priv->dev, np, reg_desc); From db2d2dc9a0b58c6faefb6b002fdbed4f0362d1a4 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Tue, 15 Nov 2022 19:10:00 +0100 Subject: [PATCH 339/963] spi: spi-imx: Fix spi_bus_clk if requested clock is higher than input clock In case the requested bus clock is higher than the input clock, the correct dividers (pre = 0, post = 0) are returned from mx51_ecspi_clkdiv(), but *fres is left uninitialized and therefore contains an arbitrary value. This causes trouble for the recently introduced PIO polling feature as the value in spi_imx->spi_bus_clk is used there to calculate for which transfers to enable PIO polling. Fix this by setting *fres even if no clock dividers are in use. This issue was observed on Kontron BL i.MX8MM with an SPI peripheral clock set to 50 MHz by default and a requested SPI bus clock of 80 MHz for the SPI NOR flash. With the fix applied the debug message from mx51_ecspi_clkdiv() now prints the following: spi_imx 30820000.spi: mx51_ecspi_clkdiv: fin: 50000000, fspi: 50000000, post: 0, pre: 0 Fixes: 6fd8b8503a0d ("spi: spi-imx: Fix out-of-order CS/SCLK operation at low speeds") Fixes: 07e759387788 ("spi: spi-imx: add PIO polling support") Cc: Marc Kleine-Budde Cc: David Jander Cc: Fabio Estevam Cc: Mark Brown Cc: Marek Vasut Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Tested-by: Fabio Estevam Acked-by: Marek Vasut Link: https://lore.kernel.org/r/20221115181002.2068270-1-frieder@fris.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 30d82cc7300b..468ce0a2b282 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -444,8 +444,7 @@ static unsigned int mx51_ecspi_clkdiv(struct spi_imx_data *spi_imx, unsigned int pre, post; unsigned int fin = spi_imx->spi_clk; - if (unlikely(fspi > fin)) - return 0; + fspi = min(fspi, fin); post = fls(fin) - fls(fspi); if (fin > fspi << post) From b68777d54fac21fc833ec26ea1a2a84f975ab035 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 14 Nov 2022 20:16:19 +0100 Subject: [PATCH 340/963] l2tp: Serialize access to sk_user_data with sk_callback_lock sk->sk_user_data has multiple users, which are not compatible with each other. Writers must synchronize by grabbing the sk->sk_callback_lock. l2tp currently fails to grab the lock when modifying the underlying tunnel socket fields. Fix it by adding appropriate locking. We err on the side of safety and grab the sk_callback_lock also inside the sk_destruct callback overridden by l2tp, even though there should be no refs allowing access to the sock at the time when sk_destruct gets called. v4: - serialize write to sk_user_data in l2tp sk_destruct v3: - switch from sock lock to sk_callback_lock - document write-protection for sk_user_data v2: - update Fixes to point to origin of the bug - use real names in Reported/Tested-by tags Cc: Tom Parkin Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reported-by: Haowei Yan Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/l2tp/l2tp_core.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 5db02546941c..e0517ecc6531 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -323,7 +323,7 @@ struct sk_filter; * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals - * @sk_user_data: RPC layer private data + * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7499c51b1850..754fdda8a5f5 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1150,8 +1150,10 @@ static void l2tp_tunnel_destruct(struct sock *sk) } /* Remove hooks into tunnel socket */ + write_lock_bh(&sk->sk_callback_lock); sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); /* Call the original destructor */ if (sk->sk_destruct) @@ -1469,16 +1471,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock = sockfd_lookup(tunnel->fd, &ret); if (!sock) goto err; - - ret = l2tp_validate_socket(sock->sk, net, tunnel->encap); - if (ret < 0) - goto err_sock; } + sk = sock->sk; + write_lock(&sk->sk_callback_lock); + + ret = l2tp_validate_socket(sk, net, tunnel->encap); + if (ret < 0) + goto err_sock; + tunnel->l2tp_net = net; pn = l2tp_pernet(net); - sk = sock->sk; sock_hold(sk); tunnel->sock = sk; @@ -1504,7 +1508,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, setup_udp_tunnel_sock(net, sock, &udp_cfg); } else { - sk->sk_user_data = tunnel; + rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1518,6 +1522,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); + write_unlock(&sk->sk_callback_lock); return 0; err_sock: @@ -1525,6 +1530,8 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock_release(sock); else sockfd_put(sock); + + write_unlock(&sk->sk_callback_lock); err: return ret; } From 39a72dbfe188291b156dd6523511e3d5761ce775 Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Fri, 28 Oct 2022 09:37:40 +0200 Subject: [PATCH 341/963] mmc: core: properly select voltage range without power cycle In mmc_select_voltage(), if there is no full power cycle, the voltage range selected at the end of the function will be on a single range (e.g. 3.3V/3.4V). To keep a range around the selected voltage (3.2V/3.4V), the mask shift should be reduced by 1. This issue was triggered by using a specific SD-card (Verbatim Premium 16GB UHS-1) on an STM32MP157C-DK2 board. This board cannot do UHS modes and there is no power cycle. And the card was failing to switch to high-speed mode. When adding the range 3.2V/3.3V for this card with the proposed shift change, the card can switch to high-speed mode. Fixes: ce69d37b7d8f ("mmc: core: Prevent violation of specs while initializing cards") Signed-off-by: Yann Gautier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221028073740.7259-1-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 95fa8fb1d45f..c5de202f530a 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1134,7 +1134,13 @@ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr) mmc_power_cycle(host, ocr); } else { bit = fls(ocr) - 1; - ocr &= 3 << bit; + /* + * The bit variable represents the highest voltage bit set in + * the OCR register. + * To keep a range of 2 values (e.g. 3.2V/3.3V and 3.3V/3.4V), + * we must shift the mask '3' with (bit - 1). + */ + ocr &= 3 << (bit - 1); if (bit != host->ios.vdd) dev_warn(mmc_dev(host), "exceeding card's volts\n"); } From 096cc0cddf58232bded309336961784f1d1c85f8 Mon Sep 17 00:00:00 2001 From: Chevron Li Date: Fri, 4 Nov 2022 02:55:12 -0700 Subject: [PATCH 342/963] mmc: sdhci-pci-o2micro: fix card detect fail issue caused by CD# debounce timeout The SD card is recognized failed sometimes when resume from suspend. Because CD# debounce time too long then card present report wrong. Finally, card is recognized failed. Signed-off-by: Chevron Li Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221104095512.4068-1-chevron.li@bayhubtech.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-o2micro.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index ad457cd9cbaa..bca1d095b759 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -32,6 +32,7 @@ #define O2_SD_CAPS 0xE0 #define O2_SD_ADMA1 0xE2 #define O2_SD_ADMA2 0xE7 +#define O2_SD_MISC_CTRL2 0xF0 #define O2_SD_INF_MOD 0xF1 #define O2_SD_MISC_CTRL4 0xFC #define O2_SD_MISC_CTRL 0x1C0 @@ -877,6 +878,12 @@ static int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip) /* Set Tuning Windows to 5 */ pci_write_config_byte(chip->pdev, O2_SD_TUNING_CTRL, 0x55); + //Adjust 1st and 2nd CD debounce time + pci_read_config_dword(chip->pdev, O2_SD_MISC_CTRL2, &scratch_32); + scratch_32 &= 0xFFE7FFFF; + scratch_32 |= 0x00180000; + pci_write_config_dword(chip->pdev, O2_SD_MISC_CTRL2, scratch_32); + pci_write_config_dword(chip->pdev, O2_SD_DETECT_SETTING, 1); /* Lock WP */ ret = pci_read_config_byte(chip->pdev, O2_SD_LOCK_WP, &scratch); From aeac4ec8f46d610a10adbaeff5e2edf6a88ffc62 Mon Sep 17 00:00:00 2001 From: Gleb Mazovetskiy Date: Mon, 14 Nov 2022 22:56:16 +0000 Subject: [PATCH 343/963] tcp: configurable source port perturb table size On embedded systems with little memory and no relevant security concerns, it is beneficial to reduce the size of the table. Reducing the size from 2^16 to 2^8 saves 255 KiB of kernel RAM. Makes the table size configurable as an expert option. The size was previously increased from 2^8 to 2^16 in commit 4c2c8f03a5ab ("tcp: increase source port perturb table to 2^16"). Signed-off-by: Gleb Mazovetskiy Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 10 ++++++++++ net/ipv4/inet_hashtables.c | 10 +++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e983bb0c5012..2dfb12230f08 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -402,6 +402,16 @@ config INET_IPCOMP If unsure, say Y. +config INET_TABLE_PERTURB_ORDER + int "INET: Source port perturbation table size (as power of 2)" if EXPERT + default 16 + help + Source port perturbation table size (as power of 2) for + RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm. + + The default is almost always what you want. + Only change this if you know what you are doing. + config INET_XFRM_TUNNEL tristate select INET_TUNNEL diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d3dc28156622..033bf3c2538f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -906,13 +906,13 @@ EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. + * * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though - * attacks were since demonstrated, thus we use 65536 instead to really - * give more isolation and privacy, at the expense of 256kB of kernel - * memory. + * attacks were since demonstrated, thus we use 65536 by default instead + * to really give more isolation and privacy, at the expense of 256kB + * of kernel memory. */ -#define INET_TABLE_PERTURB_SHIFT 16 -#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) +#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER) static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, From 804313b64e412a81b0b3389a10e7622452004aa6 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Wed, 16 Nov 2022 17:32:04 +0800 Subject: [PATCH 344/963] spi: dw-dma: decrease reference count in dw_spi_dma_init_mfld() pci_get_device() will increase the reference count for the returned pci_dev. Since 'dma_dev' is only used to filter the channel in dw_spi_dma_chan_filer() after using it we need to call pci_dev_put() to decrease the reference count. Also add pci_dev_put() for the error case. Fixes: 7063c0d942a1 ("spi/dw_spi: add DMA support") Signed-off-by: Xiongfeng Wang Acked-by: Serge Semin Link: https://lore.kernel.org/r/20221116093204.46700-1-wangxiongfeng2@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-dw-dma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c index 1322b8cce5b7..ababb910b391 100644 --- a/drivers/spi/spi-dw-dma.c +++ b/drivers/spi/spi-dw-dma.c @@ -128,12 +128,15 @@ static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws) dw_spi_dma_sg_burst_init(dws); + pci_dev_put(dma_dev); + return 0; free_rxchan: dma_release_channel(dws->rxchan); dws->rxchan = NULL; err_exit: + pci_dev_put(dma_dev); return -EBUSY; } From 1f386d6894d0f1b7de8ef640c41622ddd698e7ab Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 16 Nov 2022 11:37:06 +0800 Subject: [PATCH 345/963] regulator: core: fix UAF in destroy_regulator() I got a UAF report as following: ================================================================== BUG: KASAN: use-after-free in __lock_acquire+0x935/0x2060 Read of size 8 at addr ffff88810e838220 by task python3/268 Call Trace: dump_stack_lvl+0x67/0x83 print_report+0x178/0x4b0 kasan_report+0x90/0x190 __lock_acquire+0x935/0x2060 lock_acquire+0x156/0x400 _raw_spin_lock+0x2a/0x40 lockref_get+0x11/0x30 simple_recursive_removal+0x41/0x440 debugfs_remove.part.12+0x32/0x50 debugfs_remove+0x29/0x30 _regulator_put.cold.54+0x3e/0x27f regulator_put+0x1f/0x30 release_nodes+0x6a/0xa0 devres_release_all+0xf8/0x150 Allocated by task 37: kasan_save_stack+0x1c/0x40 kasan_set_track+0x21/0x30 __kasan_slab_alloc+0x5d/0x70 slab_post_alloc_hook+0x62/0x510 kmem_cache_alloc_lru+0x222/0x5a0 __d_alloc+0x31/0x440 d_alloc+0x30/0xf0 d_alloc_parallel+0xc4/0xd20 __lookup_slow+0x15e/0x2f0 lookup_one_len+0x13a/0x150 start_creating+0xea/0x190 debugfs_create_dir+0x1e/0x210 create_regulator+0x254/0x4e0 _regulator_get+0x2a1/0x467 _devm_regulator_get+0x5a/0xb0 regulator_virtual_probe+0xb9/0x1a0 Freed by task 30: kasan_save_stack+0x1c/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x50 __kasan_slab_free+0x102/0x190 kmem_cache_free+0xf6/0x600 rcu_core+0x54c/0x12b0 __do_softirq+0xf2/0x5e3 Last potentially related work creation: kasan_save_stack+0x1c/0x40 __kasan_record_aux_stack+0x98/0xb0 call_rcu+0x42/0x700 dentry_free+0x6c/0xd0 __dentry_kill+0x23b/0x2d0 dput.part.31+0x431/0x780 simple_recursive_removal+0xa9/0x440 debugfs_remove.part.12+0x32/0x50 debugfs_remove+0x29/0x30 regulator_unregister+0xe3/0x230 release_nodes+0x6a/0xa0 ================================================================== Here is how happened: processor A processor B regulator_register() rdev_init_debugfs() rdev->debugfs = debugfs_create_dir() devm_regulator_get() rdev = regulator_dev_lookup() create_regulator(rdev) // using rdev->debugfs as parent debugfs_create_dir(rdev->debugfs) mfd_remove_devices_fn() release_nodes() regulator_unregister() // free rdev->debugfs debugfs_remove_recursive(rdev->debugfs) release_nodes() destroy_regulator() debugfs_remove_recursive() <- causes UAF In devm_regulator_get(), after getting rdev, the refcount is get, so fix this by moving debugfs_remove_recursive() to regulator_dev_release(), then it can be proctected by the refcount, the 'rdev->debugfs' can not be freed until the refcount is 0. Fixes: 5de705194e98 ("regulator: Add basic per consumer debugfs") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221116033706.3595812-1-yangyingliang@huawei.com Signed-off-by: Mark Brown --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index b3a0f6df91c0..ec626bda79d6 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5149,6 +5149,7 @@ static void regulator_dev_release(struct device *dev) { struct regulator_dev *rdev = dev_get_drvdata(dev); + debugfs_remove_recursive(rdev->debugfs); kfree(rdev->constraints); of_node_put(rdev->dev.of_node); kfree(rdev); @@ -5676,7 +5677,6 @@ void regulator_unregister(struct regulator_dev *rdev) mutex_lock(®ulator_list_mutex); - debugfs_remove_recursive(rdev->debugfs); WARN_ON(rdev->open_count); regulator_remove_coupling(rdev); unset_regulator_supplies(rdev); From 222cfa0118aa68687ace74aab8fdf77ce8fbd7e6 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Mon, 14 Nov 2022 16:31:00 +0800 Subject: [PATCH 346/963] mmc: sdhci-pci: Fix possible memory leak caused by missing pci_dev_put() pci_get_device() will increase the reference count for the returned pci_dev. We need to use pci_dev_put() to decrease the reference count before amd_probe() returns. There is no problem for the 'smbus_dev == NULL' branch because pci_dev_put() can also handle the NULL input parameter case. Fixes: 659c9bc114a8 ("mmc: sdhci-pci: Build o2micro support in the same module") Signed-off-by: Xiongfeng Wang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221114083100.149200-1-wangxiongfeng2@huawei.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 34ea1acbb3cc..28dc65023fa9 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1749,6 +1749,8 @@ static int amd_probe(struct sdhci_pci_chip *chip) } } + pci_dev_put(smbus_dev); + if (gen == AMD_CHIPSET_BEFORE_ML || gen == AMD_CHIPSET_CZ) chip->quirks2 |= SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD; From 2ec106b96afc19698ff934323b633c0729d4c7f8 Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Wed, 16 Nov 2022 16:59:48 +0100 Subject: [PATCH 347/963] USB: serial: option: remove old LARA-R6 PID Remove the UBLOX_PRODUCT_R6XX 0x90fa association since LARA-R6 00B final product uses a new USB composition with different PID. 0x90fa PID used only by LARA-R6 internal prototypes. Move 0x90fa PID directly in the option_ids array since used by other Qualcomm based modem vendors as pointed out in: https://lore.kernel.org/all/6572c4e6-d8bc-b8d3-4396-d879e4e76338@gmail.com Signed-off-by: Davide Tronchin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2a0d8fc9af91..e99a216bf646 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -240,7 +240,6 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_UC15 0x9090 /* These u-blox products use Qualcomm's vendor ID */ #define UBLOX_PRODUCT_R410M 0x90b2 -#define UBLOX_PRODUCT_R6XX 0x90fa /* These Yuga products use Qualcomm's vendor ID */ #define YUGA_PRODUCT_CLM920_NC5 0x9625 @@ -1127,7 +1126,7 @@ static const struct usb_device_id option_ids[] = { /* u-blox products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, - { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX), + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), .driver_info = RSVD(3) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), From d9e37a5c4d80ea25a7171ab8557a449115554e76 Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Wed, 16 Nov 2022 16:59:49 +0100 Subject: [PATCH 348/963] USB: serial: option: add u-blox LARA-R6 00B modem The official LARA-R6 (00B) modem uses 0x908b PID. LARA-R6 00B does not implement a QMI interface on port 4, the reservation (RSVD(4)) has been added to meet other companies that implement QMI on that interface. LARA-R6 00B USB composition exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parser/alternative functions Signed-off-by: Davide Tronchin Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e99a216bf646..f6babe5a3a28 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1126,6 +1126,8 @@ static const struct usb_device_id option_ids[] = { /* u-blox products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x908b), /* u-blox LARA-R6 00B */ + .driver_info = RSVD(4) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), .driver_info = RSVD(3) }, /* Quectel products using Quectel vendor ID */ From c1547f12df8b8e9ca2686accee43213ecd117efe Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Wed, 16 Nov 2022 16:59:50 +0100 Subject: [PATCH 349/963] USB: serial: option: add u-blox LARA-L6 modem Add LARA-L6 PIDs for three different USB compositions. LARA-L6 module can be configured (by AT interface) in three different USB modes: * Default mode (Vendor ID: 0x1546 Product ID: 0x1341) with 4 serial interfaces * RmNet mode (Vendor ID: 0x1546 Product ID: 0x1342) with 4 serial interfaces and 1 RmNet virtual network interface * CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1343) with 4 serial interface and 1 CDC-ECM virtual network interface In default mode LARA-L6 exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parser/alternative functions In RmNet mode LARA-L6 exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parset/alternative functions If 4: RMNET interface In CDC-ECM mode LARA-L6 exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parset/alternative functions If 4: CDC-ECM interface Signed-off-by: Davide Tronchin [ johan: drop PID defines in favour of comments ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index f6babe5a3a28..c3b7f1d98e78 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -162,6 +162,8 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 +#define UBLOX_VENDOR_ID 0x1546 + /* AMOI PRODUCTS */ #define AMOI_VENDOR_ID 0x1614 #define AMOI_PRODUCT_H01 0x0800 @@ -1130,6 +1132,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), .driver_info = RSVD(3) }, + /* u-blox products */ + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1341) }, /* u-blox LARA-L6 */ + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1342), /* u-blox LARA-L6 (RMNET) */ + .driver_info = RSVD(4) }, + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1343), /* u-blox LARA-L6 (ECM) */ + .driver_info = RSVD(4) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, From f032c7ae4502f417939c84a1ca27766f5a2ad69f Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Tue, 15 Nov 2022 15:32:17 -0800 Subject: [PATCH 350/963] ANDROID: kleaf: Move list of kernel modules to an extension. This is so that it can be loaded by other packages. Some device's may wish to not build mixed build, in which case they'll either need to use 'auto' in module_outs (discouraged), or load the list of GKI modules. Test: TH Bug: 258308697 Change-Id: Iff7778b9edae7101a2debd44a49bb5eb403bfb2f Signed-off-by: Yifan Hong --- BUILD.bazel | 38 ++++++-------------------------------- modules.bzl | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 32 deletions(-) create mode 100644 modules.bzl diff --git a/BUILD.bazel b/BUILD.bazel index 9e9183b58793..e59de4867742 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -19,51 +19,25 @@ package( load("//build/kernel/kleaf:common_kernels.bzl", "define_common_kernels", "define_db845c") load("//build/kernel/kleaf:kernel.bzl", "ddk_headers") - -_common_gki_modules_list = [ - # keep sorted - "drivers/block/zram/zram.ko", - "drivers/bluetooth/btbcm.ko", - "drivers/bluetooth/btqca.ko", - "drivers/bluetooth/btsdio.ko", - "drivers/bluetooth/hci_uart.ko", - "drivers/net/can/dev/can-dev.ko", - "drivers/net/can/slcan/slcan.ko", - "drivers/net/can/vcan.ko", - "drivers/usb/class/cdc-acm.ko", - "drivers/usb/serial/ftdi_sio.ko", - "drivers/usb/serial/usbserial.ko", - "mm/zsmalloc.ko", - "net/8021q/8021q.ko", - "net/bluetooth/hidp/hidp.ko", - "net/bluetooth/rfcomm/rfcomm.ko", - "net/can/can-bcm.ko", - "net/can/can-gw.ko", - "net/can/can-raw.ko", - "net/mac80211/mac80211.ko", - "net/nfc/nfc.ko", - "net/tipc/diag.ko", - "net/tipc/tipc.ko", - "net/wireless/cfg80211.ko", -] +load(":modules.bzl", "COMMON_GKI_MODULES_LIST") define_common_kernels(target_configs = { "kernel_aarch64": { - "module_implicit_outs": _common_gki_modules_list, + "module_implicit_outs": COMMON_GKI_MODULES_LIST, }, "kernel_aarch64_16k": { - "module_implicit_outs": _common_gki_modules_list, + "module_implicit_outs": COMMON_GKI_MODULES_LIST, }, "kernel_aarch64_debug": { - "module_implicit_outs": _common_gki_modules_list, + "module_implicit_outs": COMMON_GKI_MODULES_LIST, }, "kernel_x86_64": { "kmi_symbol_list_strict_mode": False, - "module_implicit_outs": _common_gki_modules_list, + "module_implicit_outs": COMMON_GKI_MODULES_LIST, }, "kernel_x86_64_debug": { "kmi_symbol_list_strict_mode": False, - "module_implicit_outs": _common_gki_modules_list, + "module_implicit_outs": COMMON_GKI_MODULES_LIST, }, }) diff --git a/modules.bzl b/modules.bzl new file mode 100644 index 000000000000..199dbc681efc --- /dev/null +++ b/modules.bzl @@ -0,0 +1,26 @@ +COMMON_GKI_MODULES_LIST = [ + # keep sorted + "drivers/block/zram/zram.ko", + "drivers/bluetooth/btbcm.ko", + "drivers/bluetooth/btqca.ko", + "drivers/bluetooth/btsdio.ko", + "drivers/bluetooth/hci_uart.ko", + "drivers/net/can/dev/can-dev.ko", + "drivers/net/can/slcan/slcan.ko", + "drivers/net/can/vcan.ko", + "drivers/usb/class/cdc-acm.ko", + "drivers/usb/serial/ftdi_sio.ko", + "drivers/usb/serial/usbserial.ko", + "mm/zsmalloc.ko", + "net/8021q/8021q.ko", + "net/bluetooth/hidp/hidp.ko", + "net/bluetooth/rfcomm/rfcomm.ko", + "net/can/can-bcm.ko", + "net/can/can-gw.ko", + "net/can/can-raw.ko", + "net/mac80211/mac80211.ko", + "net/nfc/nfc.ko", + "net/tipc/diag.ko", + "net/tipc/tipc.ko", + "net/wireless/cfg80211.ko", +] From a51e5d293dd1c2e7bf6f7be788466cd9b5d280fb Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Wed, 16 Nov 2022 17:10:27 +0300 Subject: [PATCH 351/963] cifs: add check for returning value of SMB2_set_info_init If the returning value of SMB2_set_info_init is an error-value, exit the function. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 0967e5457954 ("cifs: use a compound for setting an xattr") Signed-off-by: Anastasia Belova Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 9737296c0fbc..bfaafd02fb1f 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1116,6 +1116,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, COMPOUND_FID, current->tgid, FILE_FULL_EA_INFORMATION, SMB2_O_INFO_FILE, 0, data, size); + if (rc) + goto sea_exit; smb2_set_next_command(tcon, &rqst[1]); smb2_set_related(&rqst[1]); From 5f4696ddca4b8a0bbbc36bd46829f97aab5a4552 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 19:57:47 -0600 Subject: [PATCH 352/963] bus: sunxi-rsb: Remove the shutdown callback Shutting down the RSB controller prevents communicating with a PMIC inside pm_power_off(), since that gets called after device_shutdown(), so it breaks system poweroff on some boards. Reported-by: Ivaylo Dimitrov Tested-by: Ivaylo Dimitrov Acked-by: Jernej Skrabec Fixes: 843107498f91 ("bus: sunxi-rsb: Implement suspend/resume/shutdown callbacks") Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221114015749.28490-2-samuel@sholland.org Signed-off-by: Jernej Skrabec --- drivers/bus/sunxi-rsb.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 4cd2e127946e..17343cd75338 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -812,14 +812,6 @@ static int sunxi_rsb_remove(struct platform_device *pdev) return 0; } -static void sunxi_rsb_shutdown(struct platform_device *pdev) -{ - struct sunxi_rsb *rsb = platform_get_drvdata(pdev); - - pm_runtime_disable(&pdev->dev); - sunxi_rsb_hw_exit(rsb); -} - static const struct dev_pm_ops sunxi_rsb_dev_pm_ops = { SET_RUNTIME_PM_OPS(sunxi_rsb_runtime_suspend, sunxi_rsb_runtime_resume, NULL) @@ -835,7 +827,6 @@ MODULE_DEVICE_TABLE(of, sunxi_rsb_of_match_table); static struct platform_driver sunxi_rsb_driver = { .probe = sunxi_rsb_probe, .remove = sunxi_rsb_remove, - .shutdown = sunxi_rsb_shutdown, .driver = { .name = RSB_CTRL_NAME, .of_match_table = sunxi_rsb_of_match_table, From 077686da0e2162c4ea5ae0df205849c2a7a84479 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sun, 13 Nov 2022 19:57:48 -0600 Subject: [PATCH 353/963] bus: sunxi-rsb: Support atomic transfers When communicating with a PMIC during system poweroff (pm_power_off()), IRQs are disabled and we are in a RCU read-side critical section, so we cannot use wait_for_completion_io_timeout(). Instead, poll the status register for transfer completion. Fixes: d787dcdb9c8f ("bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus") Signed-off-by: Samuel Holland Reviewed-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221114015749.28490-3-samuel@sholland.org Signed-off-by: Jernej Skrabec --- drivers/bus/sunxi-rsb.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/bus/sunxi-rsb.c b/drivers/bus/sunxi-rsb.c index 17343cd75338..3aa91aed3bf7 100644 --- a/drivers/bus/sunxi-rsb.c +++ b/drivers/bus/sunxi-rsb.c @@ -267,6 +267,9 @@ EXPORT_SYMBOL_GPL(sunxi_rsb_driver_register); /* common code that starts a transfer */ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) { + u32 int_mask, status; + bool timeout; + if (readl(rsb->regs + RSB_CTRL) & RSB_CTRL_START_TRANS) { dev_dbg(rsb->dev, "RSB transfer still in progress\n"); return -EBUSY; @@ -274,13 +277,23 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) reinit_completion(&rsb->complete); - writel(RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER, - rsb->regs + RSB_INTE); + int_mask = RSB_INTS_LOAD_BSY | RSB_INTS_TRANS_ERR | RSB_INTS_TRANS_OVER; + writel(int_mask, rsb->regs + RSB_INTE); writel(RSB_CTRL_START_TRANS | RSB_CTRL_GLOBAL_INT_ENB, rsb->regs + RSB_CTRL); - if (!wait_for_completion_io_timeout(&rsb->complete, - msecs_to_jiffies(100))) { + if (irqs_disabled()) { + timeout = readl_poll_timeout_atomic(rsb->regs + RSB_INTS, + status, (status & int_mask), + 10, 100000); + writel(status, rsb->regs + RSB_INTS); + } else { + timeout = !wait_for_completion_io_timeout(&rsb->complete, + msecs_to_jiffies(100)); + status = rsb->status; + } + + if (timeout) { dev_dbg(rsb->dev, "RSB timeout\n"); /* abort the transfer */ @@ -292,18 +305,18 @@ static int _sunxi_rsb_run_xfer(struct sunxi_rsb *rsb) return -ETIMEDOUT; } - if (rsb->status & RSB_INTS_LOAD_BSY) { + if (status & RSB_INTS_LOAD_BSY) { dev_dbg(rsb->dev, "RSB busy\n"); return -EBUSY; } - if (rsb->status & RSB_INTS_TRANS_ERR) { - if (rsb->status & RSB_INTS_TRANS_ERR_ACK) { + if (status & RSB_INTS_TRANS_ERR) { + if (status & RSB_INTS_TRANS_ERR_ACK) { dev_dbg(rsb->dev, "RSB slave nack\n"); return -EINVAL; } - if (rsb->status & RSB_INTS_TRANS_ERR_DATA) { + if (status & RSB_INTS_TRANS_ERR_DATA) { dev_dbg(rsb->dev, "RSB transfer data error\n"); return -EIO; } From 38f0d57d0a47b2783cac4d78a6141c7abf423532 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 15 Nov 2022 17:06:43 +0800 Subject: [PATCH 354/963] media: dt-bindings: allwinner: h6-vpu-g2: Add IOMMU reference property The Hantro G2 video decoder block sits behind an IOMMU. The device tree binding needs a property to reference it. Without a reference for the implementation to properly configure the IOMMU, it will fault and cause the video decoder to fail. Add an "iommus" property for referring to the IOMMU port. The master ID in the example is taken from the IOMMU fault error message on Linux, and the number seems to match the order in the user manual's IOMMU diagram. Fixes: fd6be12716c4 ("media: dt-bindings: allwinner: document H6 Hantro G2 binding") Signed-off-by: Chen-Yu Tsai Acked-by: Jernej Skrabec Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221115090644.3602573-2-wenst@chromium.org Signed-off-by: Jernej Skrabec --- .../bindings/media/allwinner,sun50i-h6-vpu-g2.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml b/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml index 24d7bf21499e..9d44236f2deb 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun50i-h6-vpu-g2.yaml @@ -36,6 +36,9 @@ properties: resets: maxItems: 1 + iommus: + maxItems: 1 + required: - compatible - reg @@ -43,6 +46,7 @@ required: - clocks - clock-names - resets + - iommus additionalProperties: false @@ -59,6 +63,7 @@ examples: clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; ... From 50edc257a152541b8bcdc84f77de5e4efad7013d Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 15 Nov 2022 17:06:44 +0800 Subject: [PATCH 355/963] arm64: dts: allwinner: h6: Add IOMMU reference to Hantro G2 The Hantro G2 video decoder block sits behind the IOMMU. Without a reference for the system to properly configure the IOMMU, it will fault and cause the video decoder to fail. Add a proper reference to the IOMMU port. The master ID is taken from the IOMMU fault error message on Linux, and the number seems to match the order in the user manual's IOMMU diagram. Fixes: 0baddea60e8d ("arm64: dts: allwinner: h6: Add Hantro G2 node") Signed-off-by: Chen-Yu Tsai Acked-by: Jernej Skrabec Link: https://lore.kernel.org/r/20221115090644.3602573-3-wenst@chromium.org Signed-off-by: Jernej Skrabec --- arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 53f6660656ac..ca1d287a0a01 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -161,6 +161,7 @@ video-codec-g2@1c00000 { clocks = <&ccu CLK_BUS_VP9>, <&ccu CLK_VP9>; clock-names = "bus", "mod"; resets = <&ccu RST_BUS_VP9>; + iommus = <&iommu 5>; }; video-codec@1c0e000 { From 3f4a3cdd1fc79ed0fe680e6e5222bcd1fb9b6304 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Wed, 16 Nov 2022 06:57:55 +0000 Subject: [PATCH 356/963] ANDROID: GKI: Convert Bluetooth Support as module Converts CONFIG_BT: Bluetooth subsystem support as GKI modules. CRYPTO_CMAC & CRYPTO_ECDH are being added as CONFIG_BT is selecting them i.e. they were enabled but not visibile in savedefconfig when BT was an in-built feature and becomes visible in savedefconfig when BT is a module. Bug: 232431151 Test: TH Change-Id: I624939d007fadbf852ad53c4404df12ac769fcd8 Signed-off-by: Ramji Jiyani --- android/gki_system_dlkm_modules | 1 + arch/arm64/configs/gki_defconfig | 4 +++- arch/x86/configs/gki_defconfig | 4 +++- modules.bzl | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index ecef27a870e2..e72658954e35 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -11,6 +11,7 @@ drivers/usb/serial/ftdi_sio.ko drivers/usb/serial/usbserial.ko mm/zsmalloc.ko net/8021q/8021q.ko +net/bluetooth/bluetooth.ko net/bluetooth/hidp/hidp.ko net/bluetooth/rfcomm/rfcomm.ko net/can/can-bcm.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 797f6ae63705..9963c62e851f 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -257,7 +257,7 @@ CONFIG_CAN=y CONFIG_CAN_RAW=m CONFIG_CAN_BCM=m CONFIG_CAN_GW=m -CONFIG_BT=y +CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_HIDP=m @@ -638,11 +638,13 @@ CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" CONFIG_SECURITY_SELINUX=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y +CONFIG_CRYPTO_ECDH=y CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_HCTR2=y CONFIG_CRYPTO_CHACHA20POLY1305=y CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_CMAC=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_XCBC=y CONFIG_CRYPTO_LZO=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index f6cfed0bf282..66bd0e5df1e3 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -247,7 +247,7 @@ CONFIG_CAN=y CONFIG_CAN_RAW=m CONFIG_CAN_BCM=m CONFIG_CAN_GW=m -CONFIG_BT=y +CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_HIDP=m @@ -581,11 +581,13 @@ CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" CONFIG_SECURITY_SELINUX=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y +CONFIG_CRYPTO_ECDH=y CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_HCTR2=y CONFIG_CRYPTO_CHACHA20POLY1305=y CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_CMAC=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_XCBC=y CONFIG_CRYPTO_LZO=y diff --git a/modules.bzl b/modules.bzl index 199dbc681efc..d58218c25336 100644 --- a/modules.bzl +++ b/modules.bzl @@ -13,6 +13,7 @@ COMMON_GKI_MODULES_LIST = [ "drivers/usb/serial/usbserial.ko", "mm/zsmalloc.ko", "net/8021q/8021q.ko", + "net/bluetooth/bluetooth.ko", "net/bluetooth/hidp/hidp.ko", "net/bluetooth/rfcomm/rfcomm.ko", "net/can/can-bcm.ko", From 18a522d05af3d9c4e58d2bcdf6f319c096597a8c Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Wed, 16 Nov 2022 07:41:29 +0000 Subject: [PATCH 357/963] ANDROID: GKI: Convert RF switch subsys as module Convert RF swithc subsys as GKI module. CONFIG_RFKILL: RF switch subsystem support To have control over RF switches found on many WiFi and Bluetooth cards. Bug: 232431151 Test: TH Change-Id: I6fb84f82d7a65b3d4f17261d361be1c44c6b77bb Signed-off-by: Ramji Jiyani (cherry picked from commit 6a0f9b1a5aa6f2377f6de01471c07ed550137b42) --- android/gki_system_dlkm_modules | 1 + arch/arm64/configs/gki_defconfig | 2 +- arch/x86/configs/gki_defconfig | 2 +- modules.bzl | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index e72658954e35..2a3cd4a42882 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -19,6 +19,7 @@ net/can/can-gw.ko net/can/can-raw.ko net/mac80211/mac80211.ko net/nfc/nfc.ko +net/rfkill/rfkill.ko net/tipc/diag.ko net/tipc/tipc.ko net/wireless/cfg80211.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 9963c62e851f..569e0b7f358c 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -271,7 +271,7 @@ CONFIG_NL80211_TESTMODE=y # CONFIG_CFG80211_DEFAULT_PS is not set # CONFIG_CFG80211_CRDA_SUPPORT is not set CONFIG_MAC80211=m -CONFIG_RFKILL=y +CONFIG_RFKILL=m CONFIG_NFC=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 66bd0e5df1e3..8dc799d20796 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -261,7 +261,7 @@ CONFIG_NL80211_TESTMODE=y # CONFIG_CFG80211_DEFAULT_PS is not set # CONFIG_CFG80211_CRDA_SUPPORT is not set CONFIG_MAC80211=m -CONFIG_RFKILL=y +CONFIG_RFKILL=m CONFIG_NFC=m CONFIG_PCI=y CONFIG_PCIEPORTBUS=y diff --git a/modules.bzl b/modules.bzl index d58218c25336..a5a8dacdb5fd 100644 --- a/modules.bzl +++ b/modules.bzl @@ -21,6 +21,7 @@ COMMON_GKI_MODULES_LIST = [ "net/can/can-raw.ko", "net/mac80211/mac80211.ko", "net/nfc/nfc.ko", + "net/rfkill/rfkill.ko", "net/tipc/diag.ko", "net/tipc/tipc.ko", "net/wireless/cfg80211.ko", From 064bc7312bd09a48798418663090be0c776183db Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 15 Nov 2022 17:30:25 +0800 Subject: [PATCH 358/963] netdevsim: Fix memory leak of nsim_dev->fa_cookie kmemleak reports this issue: unreferenced object 0xffff8881bac872d0 (size 8): comm "sh", pid 58603, jiffies 4481524462 (age 68.065s) hex dump (first 8 bytes): 04 00 00 00 de ad be ef ........ backtrace: [<00000000c80b8577>] __kmalloc+0x49/0x150 [<000000005292b8c6>] nsim_dev_trap_fa_cookie_write+0xc1/0x210 [netdevsim] [<0000000093d78e77>] full_proxy_write+0xf3/0x180 [<000000005a662c16>] vfs_write+0x1c5/0xaf0 [<000000007aabf84a>] ksys_write+0xed/0x1c0 [<000000005f1d2e47>] do_syscall_64+0x3b/0x90 [<000000006001c6ec>] entry_SYSCALL_64_after_hwframe+0x63/0xcd The issue occurs in the following scenarios: nsim_dev_trap_fa_cookie_write() kmalloc() fa_cookie nsim_dev->fa_cookie = fa_cookie .. nsim_drv_remove() The fa_cookie allocked in nsim_dev_trap_fa_cookie_write() is not freed. To fix, add kfree(nsim_dev->fa_cookie) to nsim_drv_remove(). Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata") Signed-off-by: Wang Yufen Cc: Jiri Pirko Link: https://lore.kernel.org/r/1668504625-14698-1-git-send-email-wangyufen@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index a7880c7ce94c..68e56e451b2b 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1683,6 +1683,7 @@ void nsim_drv_remove(struct nsim_bus_dev *nsim_bus_dev) ARRAY_SIZE(nsim_devlink_params)); devl_resources_unregister(devlink); kfree(nsim_dev->vfconfigs); + kfree(nsim_dev->fa_cookie); devl_unlock(devlink); devlink_free(devlink); dev_set_drvdata(&nsim_bus_dev->dev, NULL); From bbf0d78099f7f7393e137abec0d411436cd0c67a Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Tue, 15 Nov 2022 23:01:00 -0800 Subject: [PATCH 359/963] ANDROID: convert rockpi4 to kleaf. Use Kleaf to build rockpi4. This does not enable mixed build. Mixed build is enabled in follow-up CLs. Test: bazel build //common:rockpi4_dist Bug: 258259749 Change-Id: I4828953495c862afe517fff48195d2f6a97e3fcf Signed-off-by: Yifan Hong --- BUILD.bazel | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/BUILD.bazel b/BUILD.bazel index e59de4867742..8967b61d5d91 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -17,8 +17,9 @@ package( ], ) +load("//build/bazel_common_rules/dist:dist.bzl", "copy_to_dist_dir") load("//build/kernel/kleaf:common_kernels.bzl", "define_common_kernels", "define_db845c") -load("//build/kernel/kleaf:kernel.bzl", "ddk_headers") +load("//build/kernel/kleaf:kernel.bzl", "ddk_headers", "kernel_build", "kernel_images", "kernel_modules_install") load(":modules.bzl", "COMMON_GKI_MODULES_LIST") define_common_kernels(target_configs = { @@ -234,6 +235,95 @@ define_db845c( ], ) +# TODO(b/258259749): Convert rockpi4 to mixed build +kernel_build( + name = "rockpi4", + outs = [ + "Image", + "System.map", + "modules.builtin", + "modules.builtin.modinfo", + "rk3399-rock-pi-4b.dtb", + "vmlinux", + "vmlinux.symvers", + ], + build_config = "build.config.rockpi4", + dtstree = "//common-modules/virtual-device:rockpi4_dts", + module_outs = COMMON_GKI_MODULES_LIST + [ + # keep sorted + "drivers/block/virtio_blk.ko", + "drivers/char/hw_random/virtio-rng.ko", + "drivers/clk/clk-rk808.ko", + "drivers/cpufreq/cpufreq-dt.ko", + "drivers/dma/pl330.ko", + "drivers/gpu/drm/bridge/analogix/analogix_dp.ko", + "drivers/gpu/drm/bridge/synopsys/dw-hdmi.ko", + "drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.ko", + "drivers/gpu/drm/display/drm_display_helper.ko", + "drivers/gpu/drm/drm_dma_helper.ko", + "drivers/gpu/drm/rockchip/rockchipdrm.ko", + "drivers/i2c/busses/i2c-rk3x.ko", + "drivers/iio/adc/rockchip_saradc.ko", + "drivers/iio/buffer/industrialio-triggered-buffer.ko", + "drivers/iio/buffer/kfifo_buf.ko", + "drivers/mfd/rk808.ko", + "drivers/mmc/core/pwrseq_simple.ko", + "drivers/mmc/host/cqhci.ko", + "drivers/mmc/host/dw_mmc.ko", + "drivers/mmc/host/dw_mmc-pltfm.ko", + "drivers/mmc/host/dw_mmc-rockchip.ko", + "drivers/mmc/host/sdhci-of-arasan.ko", + "drivers/net/ethernet/stmicro/stmmac/dwmac-rk.ko", + "drivers/net/ethernet/stmicro/stmmac/stmmac.ko", + "drivers/net/ethernet/stmicro/stmmac/stmmac-platform.ko", + "drivers/net/net_failover.ko", + "drivers/net/pcs/pcs_xpcs.ko", + "drivers/net/virtio_net.ko", + "drivers/pci/controller/pcie-rockchip-host.ko", + "drivers/phy/rockchip/phy-rockchip-emmc.ko", + "drivers/phy/rockchip/phy-rockchip-inno-usb2.ko", + "drivers/phy/rockchip/phy-rockchip-pcie.ko", + "drivers/phy/rockchip/phy-rockchip-typec.ko", + "drivers/pwm/pwm-rockchip.ko", + "drivers/regulator/fan53555.ko", + "drivers/regulator/pwm-regulator.ko", + "drivers/regulator/rk808-regulator.ko", + "drivers/rtc/rtc-rk808.ko", + "drivers/soc/rockchip/io-domain.ko", + "drivers/thermal/rockchip_thermal.ko", + "drivers/usb/host/ohci-hcd.ko", + "drivers/usb/host/ohci-platform.ko", + "drivers/virtio/virtio_pci.ko", + "drivers/virtio/virtio_pci_legacy_dev.ko", + "drivers/virtio/virtio_pci_modern_dev.ko", + "drivers/watchdog/dw_wdt.ko", + "net/core/failover.ko", + ], +) + +kernel_modules_install( + name = "rockpi4_modules_install", + kernel_build = "//common:rockpi4", +) + +kernel_images( + name = "rockpi4_images", + build_initramfs = True, + kernel_build = "//common:rockpi4", + kernel_modules_install = "//common:rockpi4_modules_install", +) + +copy_to_dist_dir( + name = "rockpi4_dist", + data = [ + ":rockpi4", + ":rockpi4_images", + ":rockpi4_modules_install", + ], + dist_dir = "out/rockpi4/dist", + flat = True, +) + # DDK Headers # All headers. These are the public targets for DDK modules to use. alias( From 0a53ec9384a0fe552dea17c706812ebde4d78567 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Wed, 16 Nov 2022 19:39:38 +0000 Subject: [PATCH 360/963] ANDROID: GKI: Convert 6LoWPAN Support as module Converts IPv6 6LoWPAN as GKI modules. CONFIG_6LOWPAN: 6LoWPAN Support IPv6 over Low power Wireless Personal Area Network - "6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks. CONFIG_IEEE802154_6LOWPAN: 6lowpan support over IEEE 802.15.4 Bug: 232431151 Test: TH Change-Id: I4257cedfd499e1e01faba966e17c3d9dbf4dcbb7 Signed-off-by: Ramji Jiyani --- android/gki_system_dlkm_modules | 9 +++++++++ arch/arm64/configs/gki_defconfig | 4 ++-- arch/x86/configs/gki_defconfig | 4 ++-- modules.bzl | 9 +++++++++ 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index 2a3cd4a42882..6386038d05ac 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -10,6 +10,14 @@ drivers/usb/class/cdc-acm.ko drivers/usb/serial/ftdi_sio.ko drivers/usb/serial/usbserial.ko mm/zsmalloc.ko +net/6lowpan/6lowpan.ko +net/6lowpan/nhc_dest.ko +net/6lowpan/nhc_fragment.ko +net/6lowpan/nhc_hop.ko +net/6lowpan/nhc_ipv6.ko +net/6lowpan/nhc_mobility.ko +net/6lowpan/nhc_routing.ko +net/6lowpan/nhc_udp.ko net/8021q/8021q.ko net/bluetooth/bluetooth.ko net/bluetooth/hidp/hidp.ko @@ -17,6 +25,7 @@ net/bluetooth/rfcomm/rfcomm.ko net/can/can-bcm.ko net/can/can-gw.ko net/can/can-raw.ko +net/ieee802154/6lowpan/ieee802154_6lowpan.ko net/mac80211/mac80211.ko net/nfc/nfc.ko net/rfkill/rfkill.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 569e0b7f358c..4e324a80ab8f 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -216,9 +216,9 @@ CONFIG_TIPC=m CONFIG_L2TP=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=m -CONFIG_6LOWPAN=y +CONFIG_6LOWPAN=m CONFIG_IEEE802154=y -CONFIG_IEEE802154_6LOWPAN=y +CONFIG_IEEE802154_6LOWPAN=m CONFIG_MAC802154=y CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 8dc799d20796..c6ee8801900f 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -206,9 +206,9 @@ CONFIG_TIPC=m CONFIG_L2TP=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=m -CONFIG_6LOWPAN=y +CONFIG_6LOWPAN=m CONFIG_IEEE802154=y -CONFIG_IEEE802154_6LOWPAN=y +CONFIG_IEEE802154_6LOWPAN=m CONFIG_MAC802154=y CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=y diff --git a/modules.bzl b/modules.bzl index a5a8dacdb5fd..f277893f5aed 100644 --- a/modules.bzl +++ b/modules.bzl @@ -12,6 +12,14 @@ COMMON_GKI_MODULES_LIST = [ "drivers/usb/serial/ftdi_sio.ko", "drivers/usb/serial/usbserial.ko", "mm/zsmalloc.ko", + "net/6lowpan/6lowpan.ko", + "net/6lowpan/nhc_dest.ko", + "net/6lowpan/nhc_fragment.ko", + "net/6lowpan/nhc_hop.ko", + "net/6lowpan/nhc_ipv6.ko", + "net/6lowpan/nhc_mobility.ko", + "net/6lowpan/nhc_routing.ko", + "net/6lowpan/nhc_udp.ko", "net/8021q/8021q.ko", "net/bluetooth/bluetooth.ko", "net/bluetooth/hidp/hidp.ko", @@ -19,6 +27,7 @@ COMMON_GKI_MODULES_LIST = [ "net/can/can-bcm.ko", "net/can/can-gw.ko", "net/can/can-raw.ko", + "net/ieee802154/6lowpan/ieee802154_6lowpan.ko", "net/mac80211/mac80211.ko", "net/nfc/nfc.ko", "net/rfkill/rfkill.ko", From 42fb0a1e84ff525ebe560e2baf9451ab69127e2b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 20 Oct 2022 23:14:27 -0400 Subject: [PATCH 361/963] tracing/ring-buffer: Have polling block on watermark Currently the way polling works on the ring buffer is broken. It will return immediately if there's any data in the ring buffer whereas a read will block until the watermark (defined by the tracefs buffer_percent file) is hit. That is, a select() or poll() will return as if there's data available, but then the following read will block. This is broken for the way select()s and poll()s are supposed to work. Have the polling on the ring buffer also block the same way reads and splice does on the ring buffer. Link: https://lkml.kernel.org/r/20221020231427.41be3f26@gandalf.local.home Cc: Linux Trace Kernel Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Primiano Tucci Cc: stable@vger.kernel.org Fixes: 1e0d6714aceb7 ("ring-buffer: Do not wake up a splice waiter when page is not full") Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 2 +- kernel/trace/ring_buffer.c | 55 ++++++++++++++++++++++++------------- kernel/trace/trace.c | 2 +- 3 files changed, 38 insertions(+), 21 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 2504df9a0453..3c7d295746f6 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full); __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table); + struct file *filp, poll_table *poll_table, int full); void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu); #define RING_BUFFER_ALL_CPUS -1 diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9712083832f4..089b1ec9cb3b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -907,6 +907,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) return cnt - read; } +static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + size_t nr_pages; + size_t dirty; + + nr_pages = cpu_buffer->nr_pages; + if (!nr_pages || !full) + return true; + + dirty = ring_buffer_nr_dirty_pages(buffer, cpu); + + return (dirty * 100) > (full * nr_pages); +} + /* * rb_wake_up_waiters - wake up tasks waiting for ring buffer input * @@ -1046,22 +1061,20 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) !ring_buffer_empty_cpu(buffer, cpu)) { unsigned long flags; bool pagebusy; - size_t nr_pages; - size_t dirty; + bool done; if (!full) break; raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; - nr_pages = cpu_buffer->nr_pages; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu); + done = !pagebusy && full_hit(buffer, cpu, full); + if (!cpu_buffer->shortest_full || cpu_buffer->shortest_full > full) cpu_buffer->shortest_full = full; raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (!pagebusy && - (!nr_pages || (dirty * 100) > full * nr_pages)) + if (done) break; } @@ -1087,6 +1100,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) * @cpu: the cpu buffer to wait on * @filp: the file descriptor * @poll_table: The poll descriptor + * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise @@ -1096,14 +1110,15 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full) * zero otherwise. */ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, - struct file *filp, poll_table *poll_table) + struct file *filp, poll_table *poll_table, int full) { struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *work; - if (cpu == RING_BUFFER_ALL_CPUS) + if (cpu == RING_BUFFER_ALL_CPUS) { work = &buffer->irq_work; - else { + full = 0; + } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -EINVAL; @@ -1111,8 +1126,14 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, work = &cpu_buffer->irq_work; } - poll_wait(filp, &work->waiters, poll_table); - work->waiters_pending = true; + if (full) { + poll_wait(filp, &work->full_waiters, poll_table); + work->full_waiters_pending = true; + } else { + poll_wait(filp, &work->waiters, poll_table); + work->waiters_pending = true; + } + /* * There's a tight race between setting the waiters_pending and * checking if the ring buffer is empty. Once the waiters_pending bit @@ -1128,6 +1149,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu, */ smp_mb(); + if (full) + return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0; + if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) return EPOLLIN | EPOLLRDNORM; @@ -3155,10 +3179,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, static __always_inline void rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { - size_t nr_pages; - size_t dirty; - size_t full; - if (buffer->irq_work.waiters_pending) { buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ @@ -3182,10 +3202,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched); - full = cpu_buffer->shortest_full; - nr_pages = cpu_buffer->nr_pages; - dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu); - if (full && nr_pages && (dirty * 100) <= full * nr_pages) + if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full)) return; cpu_buffer->irq_work.wakeup_full = true; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 47a44b055a1d..c6c7a0af3ed2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6681,7 +6681,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl return EPOLLIN | EPOLLRDNORM; else return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file, - filp, poll_table); + filp, poll_table, iter->tr->buffer_percent); } static __poll_t From c964d62f5cab7b43dd0534f22a96eab386c6ec5d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Nov 2022 10:44:57 -0800 Subject: [PATCH 362/963] block: make dma_alignment a stacking queue_limit Device mappers had always been getting the default 511 dma mask, but the underlying device might have a larger alignment requirement. Since this value is used to determine alloweable direct-io alignment, this needs to be a stackable limit. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221110184501.2451620-2-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-settings.c | 8 +++++--- include/linux/blkdev.h | 15 ++++++++------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 17667159482e..5487912befe8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -425,7 +425,6 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu) PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) goto fail_stats; - blk_queue_dma_alignment(q, 511); blk_set_default_limits(&q->limits); q->nr_requests = BLKDEV_DEFAULT_RQ; diff --git a/block/blk-settings.c b/block/blk-settings.c index 8bb9eef5310e..4949ed3ce7c9 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -57,6 +57,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->misaligned = 0; lim->zoned = BLK_ZONED_NONE; lim->zone_write_granularity = 0; + lim->dma_alignment = 511; } EXPORT_SYMBOL(blk_set_default_limits); @@ -600,6 +601,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->io_min = max(t->io_min, b->io_min); t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); + t->dma_alignment = max(t->dma_alignment, b->dma_alignment); /* Set non-power-of-2 compatible chunk_sectors boundary */ if (b->chunk_sectors) @@ -773,7 +775,7 @@ EXPORT_SYMBOL(blk_queue_virt_boundary); **/ void blk_queue_dma_alignment(struct request_queue *q, int mask) { - q->dma_alignment = mask; + q->limits.dma_alignment = mask; } EXPORT_SYMBOL(blk_queue_dma_alignment); @@ -795,8 +797,8 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) { BUG_ON(mask > PAGE_SIZE); - if (mask > q->dma_alignment) - q->dma_alignment = mask; + if (mask > q->limits.dma_alignment) + q->limits.dma_alignment = mask; } EXPORT_SYMBOL(blk_queue_update_dma_alignment); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50e358a19d98..9898e34b2c2d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -311,6 +311,13 @@ struct queue_limits { unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; + + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ + unsigned int dma_alignment; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -456,12 +463,6 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; - /* - * Drivers that set dma_alignment to less than 511 must be prepared to - * handle individual bvec's that are not a multiple of a SECTOR_SIZE - * due to possible offsets. - */ - unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; @@ -1324,7 +1325,7 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) static inline int queue_dma_alignment(const struct request_queue *q) { - return q ? q->dma_alignment : 511; + return q ? q->limits.dma_alignment : 511; } static inline unsigned int bdev_dma_alignment(struct block_device *bdev) From 86e4d3e8d1838ca88fb9267e669c36f6c8f7c6cd Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Nov 2022 10:44:58 -0800 Subject: [PATCH 363/963] dm-crypt: provide dma_alignment limit in io_hints This device mapper needs bio vectors to be sized and memory aligned to the logical block size. Set the minimum required queue limit accordingly. Link: https://lore.kernel.org/linux-block/20221101001558.648ee024@xps.demsh.org/ Fixes: b1a000d3b8ec5 ("block: relax direct io memory alignment") Reportred-by: Eric Biggers Reported-by: Dmitrii Tcvetkov Signed-off-by: Keith Busch Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20221110184501.2451620-3-kbusch@meta.com Signed-off-by: Jens Axboe --- drivers/md/dm-crypt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 159c6806c19b..2653516bcdef 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3630,6 +3630,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) limits->physical_block_size = max_t(unsigned, limits->physical_block_size, cc->sector_size); limits->io_min = max_t(unsigned, limits->io_min, cc->sector_size); + limits->dma_alignment = limits->logical_block_size - 1; } static struct target_type crypt_target = { From b3228254bb6e91e57f920227f72a1a7d81925d81 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Nov 2022 10:44:59 -0800 Subject: [PATCH 364/963] block: make blk_set_default_limits() private There are no external users of this function. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20221110184501.2451620-4-kbusch@meta.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 1 - block/blk.h | 1 + include/linux/blkdev.h | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 4949ed3ce7c9..8ac1038d0c79 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -59,7 +59,6 @@ void blk_set_default_limits(struct queue_limits *lim) lim->zone_write_granularity = 0; lim->dma_alignment = 511; } -EXPORT_SYMBOL(blk_set_default_limits); /** * blk_set_stacking_limits - set default limits for stacking devices diff --git a/block/blk.h b/block/blk.h index 5350bf363035..a0c1d31a8fe7 100644 --- a/block/blk.h +++ b/block/blk.h @@ -324,6 +324,7 @@ void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); +void blk_set_default_limits(struct queue_limits *lim); int blk_dev_init(void); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9898e34b2c2d..891f8cbcd043 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -945,7 +945,6 @@ extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); -extern void blk_set_default_limits(struct queue_limits *lim); extern void blk_set_stacking_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); From 29aa778bb66795e6a78b1c99beadc83887827868 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Nov 2022 10:45:00 -0800 Subject: [PATCH 365/963] dm-integrity: set dma_alignment limit in io_hints This device mapper needs bio vectors to be sized and memory aligned to the logical block size. Set the minimum required queue limit accordingly. Signed-off-by: Keith Busch Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20221110184501.2451620-5-kbusch@meta.com Signed-off-by: Jens Axboe --- drivers/md/dm-integrity.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index aaf2472df6e5..e1e7b205573f 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3370,6 +3370,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT; limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT; blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT); + limits->dma_alignment = limits->logical_block_size - 1; } } From 50a893359cd2643ee1afc96eedc9e7084cab49fa Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Nov 2022 10:45:01 -0800 Subject: [PATCH 366/963] dm-log-writes: set dma_alignment limit in io_hints This device mapper needs bio vectors to be sized and memory aligned to the logical block size. Set the minimum required queue limit accordingly. Signed-off-by: Keith Busch Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20221110184501.2451620-6-kbusch@meta.com Signed-off-by: Jens Axboe --- drivers/md/dm-log-writes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 20fd688f72e7..178e13a5b059 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -875,6 +875,7 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev); limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev); limits->io_min = limits->physical_block_size; + limits->dma_alignment = limits->logical_block_size - 1; } #if IS_ENABLED(CONFIG_FS_DAX) From 51fdad54ab85049e33b8e7d916b83b086da32380 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Wed, 16 Nov 2022 22:11:44 +0000 Subject: [PATCH 367/963] ANDROID: GKI: Convert CAN Bus Subsystem as module Sets CONFIG_CAN=m to conver CAN Bus subsystem as GKI module. Bug: 232431151 Test: TH Change-Id: Ia90fcc7a30edb994a8e47dea617050836356729d Signed-off-by: Ramji Jiyani --- android/gki_system_dlkm_modules | 1 + arch/arm64/configs/gki_defconfig | 6 +----- arch/x86/configs/gki_defconfig | 6 +----- modules.bzl | 1 + 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index 6386038d05ac..a86027a2a369 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -22,6 +22,7 @@ net/8021q/8021q.ko net/bluetooth/bluetooth.ko net/bluetooth/hidp/hidp.ko net/bluetooth/rfcomm/rfcomm.ko +net/can/can.ko net/can/can-bcm.ko net/can/can-gw.ko net/can/can-raw.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 4e324a80ab8f..f2d0bad7dda4 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -253,10 +253,7 @@ CONFIG_NET_ACT_SKBEDIT=y CONFIG_NET_ACT_BPF=y CONFIG_VSOCKETS=y CONFIG_CGROUP_NET_PRIO=y -CONFIG_CAN=y -CONFIG_CAN_RAW=m -CONFIG_CAN_BCM=m -CONFIG_CAN_GW=m +CONFIG_CAN=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y @@ -317,7 +314,6 @@ CONFIG_IFB=y CONFIG_MACSEC=y CONFIG_TUN=y CONFIG_VETH=y -CONFIG_CAN_DEV=m CONFIG_CAN_VCAN=m CONFIG_CAN_SLCAN=m CONFIG_PPP=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index c6ee8801900f..12aae44995f6 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -243,10 +243,7 @@ CONFIG_NET_ACT_SKBEDIT=y CONFIG_NET_ACT_BPF=y CONFIG_VSOCKETS=y CONFIG_CGROUP_NET_PRIO=y -CONFIG_CAN=y -CONFIG_CAN_RAW=m -CONFIG_CAN_BCM=m -CONFIG_CAN_GW=m +CONFIG_CAN=m CONFIG_BT=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y @@ -298,7 +295,6 @@ CONFIG_IFB=y CONFIG_MACSEC=y CONFIG_TUN=y CONFIG_VETH=y -CONFIG_CAN_DEV=m CONFIG_CAN_VCAN=m CONFIG_CAN_SLCAN=m CONFIG_PPP=y diff --git a/modules.bzl b/modules.bzl index f277893f5aed..b69bea33e48b 100644 --- a/modules.bzl +++ b/modules.bzl @@ -24,6 +24,7 @@ COMMON_GKI_MODULES_LIST = [ "net/bluetooth/bluetooth.ko", "net/bluetooth/hidp/hidp.ko", "net/bluetooth/rfcomm/rfcomm.ko", + "net/can/can.ko", "net/can/can-bcm.ko", "net/can/can-gw.ko", "net/can/can-raw.ko", From 31029a8b2c7e656a0289194ef16415050ae4c4ac Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 21 Oct 2022 12:30:13 -0400 Subject: [PATCH 368/963] ring-buffer: Include dropped pages in counting dirty patches The function ring_buffer_nr_dirty_pages() was created to find out how many pages are filled in the ring buffer. There's two running counters. One is incremented whenever a new page is touched (pages_touched) and the other is whenever a page is read (pages_read). The dirty count is the number touched minus the number read. This is used to determine if a blocked task should be woken up if the percentage of the ring buffer it is waiting for is hit. The problem is that it does not take into account dropped pages (when the new writes overwrite pages that were not read). And then the dirty pages will always be greater than the percentage. This makes the "buffer_percent" file inaccurate, as the number of dirty pages end up always being larger than the percentage, event when it's not and this causes user space to be woken up more than it wants to be. Add a new counter to keep track of lost pages, and include that in the accounting of dirty pages so that it is actually accurate. Link: https://lkml.kernel.org/r/20221021123013.55fb6055@gandalf.local.home Fixes: 2c2b0a78b3739 ("ring-buffer: Add percentage of ring buffer full to wake up reader") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 089b1ec9cb3b..a19369c4d8df 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -519,6 +519,7 @@ struct ring_buffer_per_cpu { local_t committing; local_t commits; local_t pages_touched; + local_t pages_lost; local_t pages_read; long last_pages_touch; size_t shortest_full; @@ -894,10 +895,18 @@ size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu) { size_t read; + size_t lost; size_t cnt; read = local_read(&buffer->buffers[cpu]->pages_read); + lost = local_read(&buffer->buffers[cpu]->pages_lost); cnt = local_read(&buffer->buffers[cpu]->pages_touched); + + if (WARN_ON_ONCE(cnt < lost)) + return 0; + + cnt -= lost; + /* The reader can read an empty page, but not more than that */ if (cnt < read) { WARN_ON_ONCE(read > cnt + 1); @@ -2031,6 +2040,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) */ local_add(page_entries, &cpu_buffer->overrun); local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_inc(&cpu_buffer->pages_lost); } /* @@ -2515,6 +2525,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, */ local_add(entries, &cpu_buffer->overrun); local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); + local_inc(&cpu_buffer->pages_lost); /* * The entries will be zeroed out when we move the @@ -5265,6 +5276,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) local_set(&cpu_buffer->committing, 0); local_set(&cpu_buffer->commits, 0); local_set(&cpu_buffer->pages_touched, 0); + local_set(&cpu_buffer->pages_lost, 0); local_set(&cpu_buffer->pages_read, 0); cpu_buffer->last_pages_touch = 0; cpu_buffer->shortest_full = 0; From 8e514b4fe64731841b4cbf612d202a1b193b9eeb Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Wed, 16 Nov 2022 23:45:57 +0000 Subject: [PATCH 369/963] ANDROID: GKI: Convert 802.15.4 support as module Converts IEEE 802.15.4 Protocol, Socket & MAC drivers as GKI modules. CONFIG_IEEE802154: IEEE Std 802.15.4 Low-Rate Wireless PANs support CONFIG_IEEE802154_SOCKET: IEEE 802.15.4 socket interface CONFIG_MAC802154: Generic IEEE 802.15.4 Soft Networking Stack CONFIG_MAC802154=m makes following configs visible in savedefconfig: CONFIG_CRC_CCITT & CONFIG_CRYPTO_CCM Bug: 232431151 Test: TH Change-Id: Iec93faf73e2955054474e482c1d25ad3df822659 Signed-off-by: Ramji Jiyani --- android/gki_system_dlkm_modules | 3 +++ arch/arm64/configs/gki_defconfig | 6 ++++-- arch/x86/configs/gki_defconfig | 6 ++++-- modules.bzl | 3 +++ 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index a86027a2a369..02afcf7110ee 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -27,7 +27,10 @@ net/can/can-bcm.ko net/can/can-gw.ko net/can/can-raw.ko net/ieee802154/6lowpan/ieee802154_6lowpan.ko +net/ieee802154/ieee802154.ko +net/ieee802154/ieee802154_socket.ko net/mac80211/mac80211.ko +net/mac802154/mac802154.ko net/nfc/nfc.ko net/rfkill/rfkill.ko net/tipc/diag.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index f2d0bad7dda4..235ba1b38318 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -217,9 +217,9 @@ CONFIG_L2TP=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=m CONFIG_6LOWPAN=m -CONFIG_IEEE802154=y +CONFIG_IEEE802154=m CONFIG_IEEE802154_6LOWPAN=m -CONFIG_MAC802154=y +CONFIG_MAC802154=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=y CONFIG_NET_SCH_PRIO=y @@ -639,6 +639,7 @@ CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_HCTR2=y CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_CMAC=y CONFIG_CRYPTO_MD5=y @@ -652,6 +653,7 @@ CONFIG_CRYPTO_SHA512_ARM64_CE=y CONFIG_CRYPTO_POLYVAL_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_TRACE_MMIO_ACCESS=y +CONFIG_CRC_CCITT=y CONFIG_XZ_DEC=y CONFIG_DMA_CMA=y CONFIG_PRINTK_TIME=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 12aae44995f6..ae138f4a7a5a 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -207,9 +207,9 @@ CONFIG_L2TP=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=m CONFIG_6LOWPAN=m -CONFIG_IEEE802154=y +CONFIG_IEEE802154=m CONFIG_IEEE802154_6LOWPAN=m -CONFIG_MAC802154=y +CONFIG_MAC802154=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_HTB=y CONFIG_NET_SCH_PRIO=y @@ -582,6 +582,7 @@ CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_HCTR2=y CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_CRYPTO_CCM=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_CMAC=y CONFIG_CRYPTO_MD5=y @@ -594,6 +595,7 @@ CONFIG_CRYPTO_AES_NI_INTEL=y CONFIG_CRYPTO_POLYVAL_CLMUL_NI=y CONFIG_CRYPTO_SHA256_SSSE3=y CONFIG_CRYPTO_SHA512_SSSE3=y +CONFIG_CRC_CCITT=y CONFIG_CRC8=y CONFIG_XZ_DEC=y CONFIG_DMA_CMA=y diff --git a/modules.bzl b/modules.bzl index b69bea33e48b..9839a6c45719 100644 --- a/modules.bzl +++ b/modules.bzl @@ -29,7 +29,10 @@ COMMON_GKI_MODULES_LIST = [ "net/can/can-gw.ko", "net/can/can-raw.ko", "net/ieee802154/6lowpan/ieee802154_6lowpan.ko", + "net/ieee802154/ieee802154.ko", + "net/ieee802154/ieee802154_socket.ko", "net/mac80211/mac80211.ko", + "net/mac802154/mac802154.ko", "net/nfc/nfc.ko", "net/rfkill/rfkill.ko", "net/tipc/diag.ko", From fbf5679ace6b0853d67aea83621dcccaa738e5f8 Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Thu, 17 Nov 2022 01:12:56 +0000 Subject: [PATCH 370/963] ANDROID: GKI: Convert L2TP as modules Converts L2TP and PPP over L2TP as GKI modules. CONFIG_L2TP: Layer Two Tunneling Protocol (L2TP) CONFIG_PPOL2TP: PPP over L2TP Makes CONFIG_NETFILTER_XT_MATCH_L2TP visible in the savedefconfig. Bug: 232431151 Test: TH Change-Id: I0e1cbd8f04f97f172f8b1551eade21cf303b24ed Signed-off-by: Ramji Jiyani --- android/gki_system_dlkm_modules | 2 ++ arch/arm64/configs/gki_defconfig | 5 +++-- arch/x86/configs/gki_defconfig | 5 +++-- modules.bzl | 2 ++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index 02afcf7110ee..018fd4fda860 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -29,6 +29,8 @@ net/can/can-raw.ko net/ieee802154/6lowpan/ieee802154_6lowpan.ko net/ieee802154/ieee802154.ko net/ieee802154/ieee802154_socket.ko +net/l2tp/l2tp_core.ko +net/l2tp/l2tp_ppp.ko net/mac80211/mac80211.ko net/mac802154/mac802154.ko net/nfc/nfc.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 235ba1b38318..63f167a17cf6 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -174,6 +174,7 @@ CONFIG_NETFILTER_XT_MATCH_ESP=y CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y CONFIG_NETFILTER_XT_MATCH_HELPER=y CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_L2TP=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MATCH_LIMIT=y CONFIG_NETFILTER_XT_MATCH_MAC=y @@ -213,7 +214,7 @@ CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y CONFIG_IP6_NF_RAW=y CONFIG_TIPC=m -CONFIG_L2TP=y +CONFIG_L2TP=m CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=m CONFIG_6LOWPAN=m @@ -321,7 +322,7 @@ CONFIG_PPP_BSDCOMP=y CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PPTP=y -CONFIG_PPPOL2TP=y +CONFIG_PPPOL2TP=m CONFIG_USB_RTL8150=y CONFIG_USB_RTL8152=y CONFIG_USB_USBNET=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index ae138f4a7a5a..066dfb877351 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -164,6 +164,7 @@ CONFIG_NETFILTER_XT_MATCH_ESP=y CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=y CONFIG_NETFILTER_XT_MATCH_HELPER=y CONFIG_NETFILTER_XT_MATCH_IPRANGE=y +CONFIG_NETFILTER_XT_MATCH_L2TP=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MATCH_LIMIT=y CONFIG_NETFILTER_XT_MATCH_MAC=y @@ -203,7 +204,7 @@ CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_MANGLE=y CONFIG_IP6_NF_RAW=y CONFIG_TIPC=m -CONFIG_L2TP=y +CONFIG_L2TP=m CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=m CONFIG_6LOWPAN=m @@ -302,7 +303,7 @@ CONFIG_PPP_BSDCOMP=y CONFIG_PPP_DEFLATE=y CONFIG_PPP_MPPE=y CONFIG_PPTP=y -CONFIG_PPPOL2TP=y +CONFIG_PPPOL2TP=m CONFIG_USB_RTL8150=y CONFIG_USB_RTL8152=y CONFIG_USB_USBNET=y diff --git a/modules.bzl b/modules.bzl index 9839a6c45719..48b07e066c7c 100644 --- a/modules.bzl +++ b/modules.bzl @@ -31,6 +31,8 @@ COMMON_GKI_MODULES_LIST = [ "net/ieee802154/6lowpan/ieee802154_6lowpan.ko", "net/ieee802154/ieee802154.ko", "net/ieee802154/ieee802154_socket.ko", + "net/l2tp/l2tp_core.ko", + "net/l2tp/l2tp_ppp.ko", "net/mac80211/mac80211.ko", "net/mac802154/mac802154.ko", "net/nfc/nfc.ko", From 649e72070cbbb8600eb823833e4748f5a0815116 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Mon, 7 Nov 2022 19:04:50 +0800 Subject: [PATCH 371/963] tracing: Fix memory leak in tracing_read_pipe() kmemleak reports this issue: unreferenced object 0xffff888105a18900 (size 128): comm "test_progs", pid 18933, jiffies 4336275356 (age 22801.766s) hex dump (first 32 bytes): 25 73 00 90 81 88 ff ff 26 05 00 00 42 01 58 04 %s......&...B.X. 03 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000560143a1>] __kmalloc_node_track_caller+0x4a/0x140 [<000000006af00822>] krealloc+0x8d/0xf0 [<00000000c309be6a>] trace_iter_expand_format+0x99/0x150 [<000000005a53bdb6>] trace_check_vprintf+0x1e0/0x11d0 [<0000000065629d9d>] trace_event_printf+0xb6/0xf0 [<000000009a690dc7>] trace_raw_output_bpf_trace_printk+0x89/0xc0 [<00000000d22db172>] print_trace_line+0x73c/0x1480 [<00000000cdba76ba>] tracing_read_pipe+0x45c/0x9f0 [<0000000015b58459>] vfs_read+0x17b/0x7c0 [<000000004aeee8ed>] ksys_read+0xed/0x1c0 [<0000000063d3d898>] do_syscall_64+0x3b/0x90 [<00000000a06dda7f>] entry_SYSCALL_64_after_hwframe+0x63/0xcd iter->fmt alloced in tracing_read_pipe() -> .. ->trace_iter_expand_format(), but not freed, to fix, add free in tracing_release_pipe() Link: https://lkml.kernel.org/r/1667819090-4643-1-git-send-email-wangyufen@huawei.com Cc: stable@vger.kernel.org Fixes: efbbdaa22bb7 ("tracing: Show real address for trace event arguments") Acked-by: Masami Hiramatsu (Google) Signed-off-by: Wang Yufen Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c6c7a0af3ed2..5bd202d6d79a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6657,6 +6657,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); free_cpumask_var(iter->started); + kfree(iter->fmt); mutex_destroy(&iter->mutex); kfree(iter); From bedf06833b1f63c2627bd5634602e05592129d7a Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Mon, 7 Nov 2022 21:35:56 +0530 Subject: [PATCH 372/963] tracing: Fix warning on variable 'struct trace_array' Move the declaration of 'struct trace_array' out of #ifdef CONFIG_TRACING block, to fix the following warning when CONFIG_TRACING is not set: >> include/linux/trace.h:63:45: warning: 'struct trace_array' declared inside parameter list will not be visible outside of this definition or declaration Link: https://lkml.kernel.org/r/20221107160556.2139463-1-shraash@google.com Fixes: 1a77dd1c2bb5 ("scsi: tracing: Fix compile error in trace_array calls when TRACING is disabled") Cc: "Martin K. Petersen" Cc: Arun Easi Acked-by: Masami Hiramatsu (Google) Reviewed-by: Guenter Roeck Signed-off-by: Aashish Sharma Signed-off-by: Steven Rostedt (Google) --- include/linux/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/trace.h b/include/linux/trace.h index b5e16e438448..80ffda871749 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -26,13 +26,13 @@ struct trace_export { int flags; }; +struct trace_array; + #ifdef CONFIG_TRACING int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); -struct trace_array; - void trace_printk_init_buffers(void); __printf(3, 4) int trace_array_printk(struct trace_array *tr, unsigned long ip, From 1f23fcc683a347cb81303ef07b365b13660ddb4e Mon Sep 17 00:00:00 2001 From: Ramji Jiyani Date: Thu, 17 Nov 2022 06:04:13 +0000 Subject: [PATCH 373/963] ANDROID: GKI: Convert Net PPP Protocol as module Converts networking Point to Point Protocol drivers as GKI modules. CONFIG_PPP: PPP (point-to-point protocol) support CONFIG_PPP_BSDCOMP: PPP BSD-Compress compression CONFIG_PPP_DEFLATE: PPP Deflate compression CONFIG_PPP_MPPE: PPP MPPE compression (encryption); selects CRYPTO_LIB_ARC4=m CONFIG_PPTP: PPP over IPv4 (PPTP) Bug: 232431151 Test: TH Change-Id: Id6ef00a4cda0433d375554a965835d6d59d2d473 Signed-off-by: Ramji Jiyani --- android/gki_system_dlkm_modules | 8 ++++++++ arch/arm64/configs/gki_defconfig | 10 +++++----- arch/x86/configs/gki_defconfig | 10 +++++----- modules.bzl | 8 ++++++++ 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules index 018fd4fda860..c7901421909e 100644 --- a/android/gki_system_dlkm_modules +++ b/android/gki_system_dlkm_modules @@ -6,9 +6,17 @@ drivers/bluetooth/hci_uart.ko drivers/net/can/dev/can-dev.ko drivers/net/can/slcan/slcan.ko drivers/net/can/vcan.ko +drivers/net/ppp/bsd_comp.ko +drivers/net/ppp/ppp_deflate.ko +drivers/net/ppp/ppp_generic.ko +drivers/net/ppp/ppp_mppe.ko +drivers/net/ppp/pppox.ko +drivers/net/ppp/pptp.ko +drivers/net/slip/slhc.ko drivers/usb/class/cdc-acm.ko drivers/usb/serial/ftdi_sio.ko drivers/usb/serial/usbserial.ko +lib/crypto/libarc4.ko mm/zsmalloc.ko net/6lowpan/6lowpan.ko net/6lowpan/nhc_dest.ko diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 63f167a17cf6..ffe9362f1cf6 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -317,11 +317,11 @@ CONFIG_TUN=y CONFIG_VETH=y CONFIG_CAN_VCAN=m CONFIG_CAN_SLCAN=m -CONFIG_PPP=y -CONFIG_PPP_BSDCOMP=y -CONFIG_PPP_DEFLATE=y -CONFIG_PPP_MPPE=y -CONFIG_PPTP=y +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_USB_RTL8150=y CONFIG_USB_RTL8152=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index 066dfb877351..f3d4d8a58d45 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -298,11 +298,11 @@ CONFIG_TUN=y CONFIG_VETH=y CONFIG_CAN_VCAN=m CONFIG_CAN_SLCAN=m -CONFIG_PPP=y -CONFIG_PPP_BSDCOMP=y -CONFIG_PPP_DEFLATE=y -CONFIG_PPP_MPPE=y -CONFIG_PPTP=y +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPTP=m CONFIG_PPPOL2TP=m CONFIG_USB_RTL8150=y CONFIG_USB_RTL8152=y diff --git a/modules.bzl b/modules.bzl index 48b07e066c7c..bd436f3f93bc 100644 --- a/modules.bzl +++ b/modules.bzl @@ -8,9 +8,17 @@ COMMON_GKI_MODULES_LIST = [ "drivers/net/can/dev/can-dev.ko", "drivers/net/can/slcan/slcan.ko", "drivers/net/can/vcan.ko", + "drivers/net/ppp/bsd_comp.ko", + "drivers/net/ppp/ppp_deflate.ko", + "drivers/net/ppp/ppp_generic.ko", + "drivers/net/ppp/ppp_mppe.ko", + "drivers/net/ppp/pppox.ko", + "drivers/net/ppp/pptp.ko", + "drivers/net/slip/slhc.ko", "drivers/usb/class/cdc-acm.ko", "drivers/usb/serial/ftdi_sio.ko", "drivers/usb/serial/usbserial.ko", + "lib/crypto/libarc4.ko", "mm/zsmalloc.ko", "net/6lowpan/6lowpan.ko", "net/6lowpan/nhc_dest.ko", From cce616e012c215d65c15e5d1afa73182dea49389 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 9 Nov 2022 22:01:24 +0800 Subject: [PATCH 374/963] tee: optee: fix possible memory leak in optee_register_device() If device_register() returns error in optee_register_device(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and optee_device is freed in optee_release_device(). Fixes: c3fa24af9244 ("tee: optee: add TEE bus device enumeration support") Signed-off-by: Yang Yingliang Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/optee/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index f3947be13e2e..64f0e047c23d 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -80,7 +80,7 @@ static int optee_register_device(const uuid_t *device_uuid) rc = device_register(&optee_device->dev); if (rc) { pr_err("device registration failed, err: %d\n", rc); - kfree(optee_device); + put_device(&optee_device->dev); } return rc; From 5db8face97f81c9342458c052572e19ac6bd8e52 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 15 Nov 2022 22:04:53 +0000 Subject: [PATCH 375/963] kbuild: Restore .version auto-increment behaviour for Debian packages Since 2df8220cc511 ("kbuild: build init/built-in.a just once"), generating Debian packages using 'make bindeb-pkg' results in packages that are stuck to the same .version, leading to unexpected behaviours (multiple packages with the same version). That's because the mkdebian script samples the build version before building the kernel, and forces the use of that version number for the actual build. Restore the previous behaviour by calling init/build-version instead of reading the .version file. This is likely to result in too many .version bumps, but this is what was happening before (although the bump was affecting builds made after the current one). Fixes: 2df8220cc511 ("kbuild: build init/built-in.a just once") Signed-off-by: Marc Zyngier Signed-off-by: Masahiro Yamada --- scripts/package/mkdebian | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian index 60a2a63a5e90..a3ac5a716e9f 100755 --- a/scripts/package/mkdebian +++ b/scripts/package/mkdebian @@ -90,7 +90,7 @@ if [ -n "$KDEB_PKGVERSION" ]; then packageversion=$KDEB_PKGVERSION revision=${packageversion##*-} else - revision=$(cat .version 2>/dev/null||echo 1) + revision=$($srctree/init/build-version) packageversion=$version-$revision fi sourcename=$KDEB_SOURCENAME From e103ba33998d0f25653cc8ebe745b68d1ee10cda Mon Sep 17 00:00:00 2001 From: Enrico Sau Date: Tue, 15 Nov 2022 11:58:59 +0100 Subject: [PATCH 376/963] net: usb: qmi_wwan: add Telit 0x103a composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following Telit LE910C4-WWX composition: 0x103a: rmnet Signed-off-by: Enrico Sau Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20221115105859.14324-1-enrico.sau@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 26c34a7c21bd..afd6faa4c2ec 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1357,6 +1357,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ + {QMI_QUIRK_SET_DTR(0x1bc7, 0x103a, 0)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ From 809ff97a677ff85dfb7ce2b63be261d1633dcf29 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Tue, 15 Nov 2022 13:44:34 +0200 Subject: [PATCH 377/963] net: usb: smsc95xx: fix external PHY reset An external PHY needs settling time after power up or reset. In the bind() function an mdio bus is registered. If at this point the external PHY is still initialising, no valid PHY ID will be read and on phy_find_first() the bind() function will fail. If an external PHY is present, wait the maximum time specified in 802.3 45.2.7.1.1. Fixes: 05b35e7eb9a1 ("smsc95xx: add phylib support") Signed-off-by: Alexandru Tachici Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221115114434.9991-2-alexandru.tachici@analog.com Signed-off-by: Paolo Abeni --- drivers/net/usb/smsc95xx.c | 46 ++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index bfb58c91db04..32d2c60d334d 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -66,6 +66,7 @@ struct smsc95xx_priv { spinlock_t mac_cr_lock; u8 features; u8 suspend_flags; + bool is_internal_phy; struct irq_chip irqchip; struct irq_domain *irqdomain; struct fwnode_handle *irqfwnode; @@ -252,6 +253,43 @@ static void smsc95xx_mdio_write(struct usbnet *dev, int phy_id, int idx, mutex_unlock(&dev->phy_mutex); } +static int smsc95xx_mdiobus_reset(struct mii_bus *bus) +{ + struct smsc95xx_priv *pdata; + struct usbnet *dev; + u32 val; + int ret; + + dev = bus->priv; + pdata = dev->driver_priv; + + if (pdata->is_internal_phy) + return 0; + + mutex_lock(&dev->phy_mutex); + + ret = smsc95xx_read_reg(dev, PM_CTRL, &val); + if (ret < 0) + goto reset_out; + + val |= PM_CTL_PHY_RST_; + + ret = smsc95xx_write_reg(dev, PM_CTRL, val); + if (ret < 0) + goto reset_out; + + /* Driver has no knowledge at this point about the external PHY. + * The 802.3 specifies that the reset process shall + * be completed within 0.5 s. + */ + fsleep(500000); + +reset_out: + mutex_unlock(&dev->phy_mutex); + + return 0; +} + static int smsc95xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx) { struct usbnet *dev = bus->priv; @@ -1052,7 +1090,6 @@ static void smsc95xx_handle_link_change(struct net_device *net) static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) { struct smsc95xx_priv *pdata; - bool is_internal_phy; char usb_path[64]; int ret, phy_irq; u32 val; @@ -1133,13 +1170,14 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) if (ret < 0) goto free_mdio; - is_internal_phy = !(val & HW_CFG_PSEL_); - if (is_internal_phy) + pdata->is_internal_phy = !(val & HW_CFG_PSEL_); + if (pdata->is_internal_phy) pdata->mdiobus->phy_mask = ~(1u << SMSC95XX_INTERNAL_PHY_ID); pdata->mdiobus->priv = dev; pdata->mdiobus->read = smsc95xx_mdiobus_read; pdata->mdiobus->write = smsc95xx_mdiobus_write; + pdata->mdiobus->reset = smsc95xx_mdiobus_reset; pdata->mdiobus->name = "smsc95xx-mdiobus"; pdata->mdiobus->parent = &dev->udev->dev; @@ -1160,7 +1198,7 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) } pdata->phydev->irq = phy_irq; - pdata->phydev->is_internal = is_internal_phy; + pdata->phydev->is_internal = pdata->is_internal_phy; /* detect device revision as different features may be available */ ret = smsc95xx_read_reg(dev, ID_REV, &val); From 58e0be1ef6118c5352b56a4d06e974c5599993a5 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 15 Nov 2022 22:24:00 +0800 Subject: [PATCH 378/963] net: use struct_group to copy ip/ipv6 header addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel test robot reported warnings when build bonding module with make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/net/bonding/: from ../drivers/net/bonding/bond_main.c:35: In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v4addrs’ at ../include/net/ip.h:566:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3984:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In function ‘fortify_memcpy_chk’, inlined from ‘iph_to_flow_copy_v6addrs’ at ../include/net/ipv6.h:900:2, inlined from ‘bond_flow_ip’ at ../drivers/net/bonding/bond_main.c:3994:3: ../include/linux/fortify-string.h:413:25: warning: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of f ield (2nd parameter); maybe use struct_group()? [-Wattribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is because we try to copy the whole ip/ip6 address to the flow_key, while we only point the to ip/ip6 saddr. Note that since these are UAPI headers, __struct_group() is used to avoid the compiler warnings. Reported-by: kernel test robot Fixes: c3f8324188fa ("net: Add full IPv6 addresses to flow_keys") Signed-off-by: Hangbin Liu Link: https://lore.kernel.org/r/20221115142400.1204786-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- include/net/ip.h | 2 +- include/net/ipv6.h | 2 +- include/uapi/linux/ip.h | 6 ++++-- include/uapi/linux/ipv6.h | 6 ++++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 038097c2a152..144bdfbb25af 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -563,7 +563,7 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow, BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) != offsetof(typeof(flow->addrs), v4addrs.src) + sizeof(flow->addrs.v4addrs.src)); - memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs)); + memcpy(&flow->addrs.v4addrs, &iph->addrs, sizeof(flow->addrs.v4addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 37943ba3a73c..d383c895592a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -897,7 +897,7 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow, BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) != offsetof(typeof(flow->addrs), v6addrs.src) + sizeof(flow->addrs.v6addrs.src)); - memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs)); + memcpy(&flow->addrs.v6addrs, &iph->addrs, sizeof(flow->addrs.v6addrs)); flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 961ec16a26b8..874a92349bf5 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -100,8 +100,10 @@ struct iphdr { __u8 ttl; __u8 protocol; __sum16 check; - __be32 saddr; - __be32 daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + __be32 saddr; + __be32 daddr; + ); /*The options start here. */ }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 03cdbe798fe3..81f4243bebb1 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -130,8 +130,10 @@ struct ipv6hdr { __u8 nexthdr; __u8 hop_limit; - struct in6_addr saddr; - struct in6_addr daddr; + __struct_group(/* no tag */, addrs, /* no attrs */, + struct in6_addr saddr; + struct in6_addr daddr; + ); }; From 2197aa6b0aa236b9896a09b9d08d6924d18b84f6 Mon Sep 17 00:00:00 2001 From: Krishna Yarlagadda Date: Thu, 17 Nov 2022 12:33:20 +0530 Subject: [PATCH 379/963] spi: tegra210-quad: Fix duplicate resource error controller data alloc is done with client device data causing duplicate resource error. Allocate memory using controller device when using devm Fixes: f89d2cc3967a ("spi: tegra210-quad: use devm call for cdata memory") Signed-off-by: Krishna Yarlagadda Reviewed-by: Jon Hunter Tested-by: Jon Hunter Link: https://lore.kernel.org/r/20221117070320.18720-1-kyarlagadda@nvidia.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra210-quad.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 10f0c5a6e0dc..9f356612ba7e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -924,8 +924,9 @@ static int tegra_qspi_start_transfer_one(struct spi_device *spi, static struct tegra_qspi_client_data *tegra_qspi_parse_cdata_dt(struct spi_device *spi) { struct tegra_qspi_client_data *cdata; + struct tegra_qspi *tqspi = spi_master_get_devdata(spi->master); - cdata = devm_kzalloc(&spi->dev, sizeof(*cdata), GFP_KERNEL); + cdata = devm_kzalloc(tqspi->dev, sizeof(*cdata), GFP_KERNEL); if (!cdata) return NULL; From 40a2226e8bfacb79dd154dea68febeead9d847e9 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Mon, 14 Nov 2022 19:59:23 +0100 Subject: [PATCH 380/963] ARM: dts: at91: sam9g20ek: enable udc vbus gpio pinctrl We set the PIOC to GPIO mode. This way the pin becomes an input signal will be usable by the controller. Without this change the udc on the 9g20ek does not work. Cc: nicolas.ferre@microchip.com Cc: ludovic.desroches@microchip.com Cc: alexandre.belloni@bootlin.com Cc: linux-arm-kernel@lists.infradead.org Cc: kernel@pengutronix.de Fixes: 5cb4e73575e3 ("ARM: at91: add at91sam9g20ek boards dt support") Signed-off-by: Michael Grzeschik Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221114185923.1023249-3-m.grzeschik@pengutronix.de --- arch/arm/boot/dts/at91sam9g20ek_common.dtsi | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index 60d61291f344..024af2db638e 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -39,6 +39,13 @@ pinctrl_pck0_as_mck: pck0_as_mck { }; + usb1 { + pinctrl_usb1_vbus_gpio: usb1_vbus_gpio { + atmel,pins = + ; /* PC5 GPIO */ + }; + }; + mmc0_slot1 { pinctrl_board_mmc0_slot1: mmc0_slot1-board { atmel,pins = @@ -84,6 +91,8 @@ macb0: ethernet@fffc4000 { }; usb1: gadget@fffa4000 { + pinctrl-0 = <&pinctrl_usb1_vbus_gpio>; + pinctrl-names = "default"; atmel,vbus-gpio = <&pioC 5 GPIO_ACTIVE_HIGH>; status = "okay"; }; From 57976762428675f259339385d3324d28ee53ec02 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Mon, 14 Nov 2022 19:59:22 +0100 Subject: [PATCH 381/963] ARM: at91: rm9200: fix usb device clock id Referring to the datasheet the index 2 is the MCKUDP. When enabled, it "Enables the automatic disable of the Master Clock of the USB Device Port when a suspend condition occurs". We fix the index to the real UDP id which "Enables the 48 MHz clock of the USB Device Port". Cc: nicolas.ferre@microchip.com Cc: ludovic.desroches@microchip.com Cc: alexandre.belloni@bootlin.com Cc: mturquette@baylibre.com Cc: sboyd@kernel.org Cc: claudiu.beznea@microchip.com Cc: linux-clk@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: kernel@pengutronix.de Fixes: 02ff48e4d7f7 ("clk: at91: add at91rm9200 pmc driver") Fixes: 0e0e528d8260 ("ARM: dts: at91: rm9200: switch to new clock bindings") Reviewed-by: Claudiu Beznea Signed-off-by: Michael Grzeschik Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221114185923.1023249-2-m.grzeschik@pengutronix.de --- arch/arm/boot/dts/at91rm9200.dtsi | 2 +- drivers/clk/at91/at91rm9200.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 7a113325abb9..6f9004ebf424 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -666,7 +666,7 @@ usb1: gadget@fffb0000 { compatible = "atmel,at91rm9200-udc"; reg = <0xfffb0000 0x4000>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; clock-names = "pclk", "hclk"; status = "disabled"; }; diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index b174f727a8ef..16870943a13e 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = { }; static const struct sck at91rm9200_systemck[] = { - { .n = "udpck", .p = "usbck", .id = 2 }, + { .n = "udpck", .p = "usbck", .id = 1 }, { .n = "uhpck", .p = "usbck", .id = 4 }, { .n = "pck0", .p = "prog0", .id = 8 }, { .n = "pck1", .p = "prog1", .id = 9 }, From 97b6847ac10ba753849207e20df74a7c0ea03343 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 16 Nov 2022 16:16:55 -0800 Subject: [PATCH 382/963] KVM: Cap vcpu->halt_poll_ns before halting rather than after Cap vcpu->halt_poll_ns based on the max halt polling time just before halting, rather than after the last halt. This arguably provides better accuracy if an admin disables halt polling in between halts, although the improvement is nominal. A side-effect of this change is that grow_halt_poll_ns() no longer needs to access vcpu->kvm->max_halt_poll_ns, which will be useful in a future commit where the max halt polling time can come from the module parameter halt_poll_ns instead. Signed-off-by: David Matlack Message-Id: <20221117001657.1067231-2-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 25d7872b29c1..2a7285c14013 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3377,9 +3377,6 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) if (val < grow_start) val = grow_start; - if (val > vcpu->kvm->max_halt_poll_ns) - val = vcpu->kvm->max_halt_poll_ns; - vcpu->halt_poll_ns = val; out: trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old); @@ -3492,11 +3489,16 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, void kvm_vcpu_halt(struct kvm_vcpu *vcpu) { bool halt_poll_allowed = !kvm_arch_no_poll(vcpu); - bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; ktime_t start, cur, poll_end; bool waited = false; + bool do_halt_poll; u64 halt_ns; + if (vcpu->halt_poll_ns > vcpu->kvm->max_halt_poll_ns) + vcpu->halt_poll_ns = vcpu->kvm->max_halt_poll_ns; + + do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; + start = cur = poll_end = ktime_get(); if (do_halt_poll) { ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns); From 175d5dc79dcb58d18e11192656eefa27abb3fd33 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 16 Nov 2022 16:16:56 -0800 Subject: [PATCH 383/963] KVM: Avoid re-reading kvm->max_halt_poll_ns during halt-polling Avoid re-reading kvm->max_halt_poll_ns multiple times during halt-polling except when it is explicitly useful, e.g. to check if the max time changed across a halt. kvm->max_halt_poll_ns can be changed at any time by userspace via KVM_CAP_HALT_POLL. This bug is unlikely to cause any serious side-effects. In the worst case one halt polls for shorter or longer than it should, and then is fixed up on the next halt. Furthmore, this is still possible since kvm->max_halt_poll_ns are not synchronized with halts. Fixes: acd05785e48c ("kvm: add capability for halt polling") Signed-off-by: David Matlack Message-Id: <20221117001657.1067231-3-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2a7285c14013..032d2fa301f5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3480,6 +3480,11 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, } } +static unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->kvm->max_halt_poll_ns); +} + /* * Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc... If halt * polling is enabled, busy wait for a short time before blocking to avoid the @@ -3488,14 +3493,15 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, */ void kvm_vcpu_halt(struct kvm_vcpu *vcpu) { + unsigned int max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); bool halt_poll_allowed = !kvm_arch_no_poll(vcpu); ktime_t start, cur, poll_end; bool waited = false; bool do_halt_poll; u64 halt_ns; - if (vcpu->halt_poll_ns > vcpu->kvm->max_halt_poll_ns) - vcpu->halt_poll_ns = vcpu->kvm->max_halt_poll_ns; + if (vcpu->halt_poll_ns > max_halt_poll_ns) + vcpu->halt_poll_ns = max_halt_poll_ns; do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns; @@ -3537,18 +3543,21 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu) update_halt_poll_stats(vcpu, start, poll_end, !waited); if (halt_poll_allowed) { + /* Recompute the max halt poll time in case it changed. */ + max_halt_poll_ns = kvm_vcpu_max_halt_poll_ns(vcpu); + if (!vcpu_valid_wakeup(vcpu)) { shrink_halt_poll_ns(vcpu); - } else if (vcpu->kvm->max_halt_poll_ns) { + } else if (max_halt_poll_ns) { if (halt_ns <= vcpu->halt_poll_ns) ; /* we had a long block, shrink polling */ else if (vcpu->halt_poll_ns && - halt_ns > vcpu->kvm->max_halt_poll_ns) + halt_ns > max_halt_poll_ns) shrink_halt_poll_ns(vcpu); /* we had a short halt and our poll time is too small */ - else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns && - halt_ns < vcpu->kvm->max_halt_poll_ns) + else if (vcpu->halt_poll_ns < max_halt_poll_ns && + halt_ns < max_halt_poll_ns) grow_halt_poll_ns(vcpu); } else { vcpu->halt_poll_ns = 0; From 9eb8ca049c23afb0410f4a7b9a7158f1a0a3ad0e Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 16 Nov 2022 16:16:57 -0800 Subject: [PATCH 384/963] KVM: Obey kvm.halt_poll_ns in VMs not using KVM_CAP_HALT_POLL Obey kvm.halt_poll_ns in VMs not using KVM_CAP_HALT_POLL on every halt, rather than just sampling the module parameter when the VM is first created. This restore the original behavior of kvm.halt_poll_ns for VMs that have not opted into KVM_CAP_HALT_POLL. Notably, this change restores the ability for admins to disable or change the maximum halt-polling time system wide for VMs not using KVM_CAP_HALT_POLL. Reported-by: Christian Borntraeger Fixes: acd05785e48c ("kvm: add capability for halt polling") Signed-off-by: David Matlack Message-Id: <20221117001657.1067231-4-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 27 ++++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 18592bdf4c1b..637a60607c7d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -776,6 +776,7 @@ struct kvm { struct srcu_struct srcu; struct srcu_struct irq_srcu; pid_t userspace_pid; + bool override_halt_poll_ns; unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool vm_bugged; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 032d2fa301f5..fab4d3790578 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1198,8 +1198,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) goto out_err_no_arch_destroy_vm; } - kvm->max_halt_poll_ns = halt_poll_ns; - r = kvm_arch_init_vm(kvm, type); if (r) goto out_err_no_arch_destroy_vm; @@ -3482,7 +3480,20 @@ static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start, static unsigned int kvm_vcpu_max_halt_poll_ns(struct kvm_vcpu *vcpu) { - return READ_ONCE(vcpu->kvm->max_halt_poll_ns); + struct kvm *kvm = vcpu->kvm; + + if (kvm->override_halt_poll_ns) { + /* + * Ensure kvm->max_halt_poll_ns is not read before + * kvm->override_halt_poll_ns. + * + * Pairs with the smp_wmb() when enabling KVM_CAP_HALT_POLL. + */ + smp_rmb(); + return READ_ONCE(kvm->max_halt_poll_ns); + } + + return READ_ONCE(halt_poll_ns); } /* @@ -4592,6 +4603,16 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return -EINVAL; kvm->max_halt_poll_ns = cap->args[0]; + + /* + * Ensure kvm->override_halt_poll_ns does not become visible + * before kvm->max_halt_poll_ns. + * + * Pairs with the smp_rmb() in kvm_vcpu_max_halt_poll_ns(). + */ + smp_wmb(); + kvm->override_halt_poll_ns = true; + return 0; } case KVM_CAP_DIRTY_LOG_RING: From 917401f26a6af5756d89b550a8e1bd50cf42b07e Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:43 +0200 Subject: [PATCH 385/963] KVM: x86: nSVM: leave nested mode on vCPU free If the VM was terminated while nested, we free the nested state while the vCPU still is in nested mode. Soon a warning will be added for this condition. Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9f88c8e6766e..098f04bec8ef 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1438,6 +1438,7 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) */ svm_clear_current_vmcb(svm->vmcb); + svm_leave_nested(vcpu); svm_free_nested(svm); sev_free_vcpu(vcpu); From 16ae56d7e0528559bf8dc9070e3bfd8ba3de80df Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:44 +0200 Subject: [PATCH 386/963] KVM: x86: nSVM: harden svm_free_nested against freeing vmcb02 while still in use Make sure that KVM uses vmcb01 before freeing nested state, and warn if that is not the case. This is a minimal fix for CVE-2022-3344 making the kernel print a warning instead of a kernel panic. Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 4c620999d230..b02a3a1792f1 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1125,6 +1125,9 @@ void svm_free_nested(struct vcpu_svm *svm) if (!svm->nested.initialized) return; + if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr)) + svm_switch_vmcb(svm, &svm->vmcb01); + svm_vcpu_free_msrpm(svm->nested.msrpm); svm->nested.msrpm = NULL; From f9697df251438b0798780900e8b43bdb12a56d64 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:45 +0200 Subject: [PATCH 387/963] KVM: x86: add kvm_leave_nested add kvm_leave_nested which wraps a call to nested_ops->leave_nested into a function. Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 --- arch/x86/kvm/vmx/nested.c | 3 --- arch/x86/kvm/x86.c | 8 +++++++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b02a3a1792f1..7354f0035a69 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1146,9 +1146,6 @@ void svm_free_nested(struct vcpu_svm *svm) svm->nested.initialized = false; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void svm_leave_nested(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0c62352dda6a..f7333b9cdfbc 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6440,9 +6440,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, return kvm_state.size; } -/* - * Forcibly leave nested mode in order to be able to reset the VCPU later on. - */ void vmx_leave_nested(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ecea83f0da49..ff5be7189237 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -628,6 +628,12 @@ static void kvm_queue_exception_vmexit(struct kvm_vcpu *vcpu, unsigned int vecto ex->payload = payload; } +/* Forcibly leave the nested mode in cases like a vCPU reset */ +static void kvm_leave_nested(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops.nested_ops->leave_nested(vcpu); +} + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, bool has_payload, unsigned long payload, bool reinject) @@ -5195,7 +5201,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & KVM_VCPUEVENT_VALID_SMM) { if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { - kvm_x86_ops.nested_ops->leave_nested(vcpu); + kvm_leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); } From ed129ec9057f89d615ba0c81a4984a90345a1684 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:46 +0200 Subject: [PATCH 388/963] KVM: x86: forcibly leave nested mode on vCPU reset While not obivous, kvm_vcpu_reset() leaves the nested mode by clearing 'vcpu->arch.hflags' but it does so without all the required housekeeping. On SVM, it is possible to have a vCPU reset while in guest mode because unlike VMX, on SVM, INIT's are not latched in SVM non root mode and in addition to that L1 doesn't have to intercept triple fault, which should also trigger L1's reset if happens in L2 while L1 didn't intercept it. If one of the above conditions happen, KVM will continue to use vmcb02 while not having in the guest mode. Later the IA32_EFER will be cleared which will lead to freeing of the nested guest state which will (correctly) free the vmcb02, but since KVM still uses it (incorrectly) this will lead to a use after free and kernel crash. This issue is assigned CVE-2022-3344 Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-5-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ff5be7189237..597d7f804d72 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12003,8 +12003,18 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) WARN_ON_ONCE(!init_event && (old_cr0 || kvm_read_cr3(vcpu) || kvm_read_cr4(vcpu))); + /* + * SVM doesn't unconditionally VM-Exit on INIT and SHUTDOWN, thus it's + * possible to INIT the vCPU while L2 is active. Force the vCPU back + * into L1 as EFER.SVME is cleared on INIT (along with all other EFER + * bits), i.e. virtualization is disabled. + */ + if (is_guest_mode(vcpu)) + kvm_leave_nested(vcpu); + kvm_lapic_reset(vcpu, init_event); + WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu)); vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; From fc6392d51d4810e4b611ef7dabd594756e5a2406 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:47 +0200 Subject: [PATCH 389/963] KVM: selftests: move idt_entry to header struct idt_entry will be used for a test which will break IDT on purpose. Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-6-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/include/x86_64/processor.h | 13 +++++++++++++ tools/testing/selftests/kvm/lib/x86_64/processor.c | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e8ca0d8a6a7e..5da0c5e2a7af 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -748,6 +748,19 @@ struct ex_regs { uint64_t rflags; }; +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 39c4409ef56a..41c1c73c464d 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1074,19 +1074,6 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) } } -struct idt_entry { - uint16_t offset0; - uint16_t selector; - uint16_t ist : 3; - uint16_t : 5; - uint16_t type : 4; - uint16_t : 1; - uint16_t dpl : 2; - uint16_t p : 1; - uint16_t offset1; - uint32_t offset2; uint32_t reserved; -}; - static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { From 0bd2d3f48704d9d00c29fa97fb80de012af87a0a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:48 +0200 Subject: [PATCH 390/963] kvm: selftests: add svm nested shutdown test Add test that tests that on SVM if L1 doesn't intercept SHUTDOWN, then L2 crashes L1 and doesn't crash L2 Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-7-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../kvm/x86_64/svm_nested_shutdown_test.c | 67 +++++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2f0d705db9db..05d980fb083d 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -41,6 +41,7 @@ /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test /x86_64/svm_nested_soft_inject_test +/x86_64/svm_nested_shutdown_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/tsc_scaling_sync diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..4a2caef2c939 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -101,6 +101,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c new file mode 100644 index 000000000000..e73fcdef47bb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_nested_shutdown_test + * + * Copyright (C) 2022, Red Hat, Inc. + * + * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +static void l2_guest_code(struct svm_test_data *svm) +{ + __asm__ __volatile__("ud2"); +} + +static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + idt[6].p = 0; // #UD is intercepted but its injection will cause #NP + idt[11].p = 0; // #NP is not intercepted and will cause another + // #NP that will be converted to #DF + idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here */ + GUEST_ASSERT(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + vm_vaddr_t svm_gva; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vcpu_alloc_svm(vm, &svm_gva); + + vcpu_args_set(vcpu, 2, svm_gva, vm->idt); + run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + kvm_vm_free(vm); +} From 92e7d5c83aff124f49082585e57939ed24b59c5c Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:49 +0200 Subject: [PATCH 391/963] KVM: x86: allow L1 to not intercept triple fault This is SVM correctness fix - although a sane L1 would intercept SHUTDOWN event, it doesn't have to, so we have to honour this. Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-8-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 6 ++++++ arch/x86/kvm/vmx/nested.c | 1 + arch/x86/kvm/x86.c | 11 ++++++----- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 7354f0035a69..995bc0f90759 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1091,6 +1091,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + + if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN)) + return; + + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN); } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f7333b9cdfbc..5b0d4859e4b7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4854,6 +4854,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu) { + kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 597d7f804d72..5384c567f68f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9811,7 +9811,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) int kvm_check_nested_events(struct kvm_vcpu *vcpu) { - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { kvm_x86_ops.nested_ops->triple_fault(vcpu); return 1; } @@ -10566,15 +10566,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } - if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { - if (is_guest_mode(vcpu)) { + if (kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { + if (is_guest_mode(vcpu)) kvm_x86_ops.nested_ops->triple_fault(vcpu); - } else { + + if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; vcpu->mmio_needed = 0; r = 0; - goto out; } + goto out; } if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { /* Page is swapped out. Do synthetic halt */ From 8357b9e19bbb5c9ce671c7b6cb93e03fc0fe4016 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:50 +0200 Subject: [PATCH 392/963] KVM: selftests: add svm part to triple_fault_test Add a SVM implementation to triple_fault_test to test that emulated/injected shutdown works. Since instead of the VMX, the SVM allows the hypervisor to avoid intercepting shutdown in guest, don't intercept shutdown to test that KVM suports this correctly. Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-9-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- .../kvm/x86_64/triple_fault_event_test.c | 73 ++++++++++++++----- 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c index 70b44f0b52fe..ead5d878a71c 100644 --- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -3,6 +3,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include #include @@ -20,10 +21,11 @@ static void l2_guest_code(void) : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); } -void l1_guest_code(struct vmx_pages *vmx) -{ #define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; +unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + +void l1_guest_code_vmx(struct vmx_pages *vmx) +{ GUEST_ASSERT(vmx->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx)); @@ -38,24 +40,53 @@ void l1_guest_code(struct vmx_pages *vmx) GUEST_DONE(); } +void l1_guest_code_svm(struct svm_test_data *svm) +{ + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* don't intercept shutdown to test the case of SVM allowing to do so */ + vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN)); + + run_guest(vmcb, svm->vmcb_gpa); + + /* should not reach here, L1 should crash */ + GUEST_ASSERT(0); +} + int main(void) { struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vcpu_events events; - vm_vaddr_t vmx_pages_gva; struct ucall uc; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX); + bool has_svm = kvm_cpu_has(X86_FEATURE_SVM); + + TEST_REQUIRE(has_vmx || has_svm); TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); + if (has_vmx) { + vm_vaddr_t vmx_pages_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + } else { + vm_vaddr_t svm_gva; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); + } + + vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); run = vcpu->run; - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -78,13 +109,21 @@ int main(void) "No triple fault pending"); vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_DONE: - break; - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } + if (has_svm) { + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + } else { + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } + return 0; } From 05311ce954aebe75935d9ae7d38ac82b5b796e33 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 3 Nov 2022 16:13:51 +0200 Subject: [PATCH 393/963] KVM: x86: remove exit_int_info warning in svm_handle_exit It is valid to receive external interrupt and have broken IDT entry, which will lead to #GP with exit_int_into that will contain the index of the IDT entry (e.g any value). Other exceptions can happen as well, like #NP or #SS (if stack switch fails). Thus this warning can be user triggred and has very little value. Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Message-Id: <20221103141351.50662-10-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 098f04bec8ef..c0950ae86b2b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -346,12 +346,6 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return 0; } -static int is_external_interrupt(u32 info) -{ - info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; - return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); -} - static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3426,15 +3420,6 @@ static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) return 0; } - if (is_external_interrupt(svm->vmcb->control.exit_int_info) && - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && - exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && - exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) - printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " - "exit_code 0x%x\n", - __func__, svm->vmcb->control.exit_int_info, - exit_code); - if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; From fb15c7c8b02354464e3a1c54d9a1700e3470c7a2 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 27 Oct 2022 23:48:03 +0000 Subject: [PATCH 394/963] ANDROID: Add ashmem ioctl to return a unique file identifier This will allow a client program to avoid redundant actions on ashmem buffers which it has already seen. Bug: 244233389 Change-Id: Ica57a8842ff163eae5f9eca8141b439091ec0940 Signed-off-by: Mark Fasheh --- drivers/staging/android/ashmem.c | 18 ++++++++++++++++++ drivers/staging/android/uapi/ashmem.h | 1 + 2 files changed, 19 insertions(+) diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index ce1e2abcbd97..feadf798736d 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -820,6 +820,7 @@ static int ashmem_pin_unpin(struct ashmem_area *asma, unsigned long cmd, static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ashmem_area *asma = file->private_data; + unsigned long ino; long ret = -ENOTTY; switch (cmd) { @@ -863,6 +864,23 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ashmem_shrink_scan(&ashmem_shrinker, &sc); } break; + case ASHMEM_GET_FILE_ID: + /* Lock around our check to avoid racing with ashmem_mmap(). */ + mutex_lock(&ashmem_mutex); + if (!asma || !asma->file) { + mutex_unlock(&ashmem_mutex); + ret = -EINVAL; + break; + } + ino = file_inode(asma->file)->i_ino; + mutex_unlock(&ashmem_mutex); + + if (copy_to_user((void __user *)arg, &ino, sizeof(ino))) { + ret = -EFAULT; + break; + } + ret = 0; + break; } return ret; diff --git a/drivers/staging/android/uapi/ashmem.h b/drivers/staging/android/uapi/ashmem.h index 134efacb3219..f962a5f38fde 100644 --- a/drivers/staging/android/uapi/ashmem.h +++ b/drivers/staging/android/uapi/ashmem.h @@ -39,5 +39,6 @@ struct ashmem_pin { #define ASHMEM_UNPIN _IOW(__ASHMEMIOC, 8, struct ashmem_pin) #define ASHMEM_GET_PIN_STATUS _IO(__ASHMEMIOC, 9) #define ASHMEM_PURGE_ALL_CACHES _IO(__ASHMEMIOC, 10) +#define ASHMEM_GET_FILE_ID _IOR(__ASHMEMIOC, 11, unsigned long) #endif /* _UAPI_LINUX_ASHMEM_H */ From 7d21fcfb409500dc9b114567f0ef8d30b3190dee Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 11 Nov 2022 10:44:49 +0900 Subject: [PATCH 395/963] scsi: mpi3mr: Suppress command reply debug prints After it receives command reply, mpi3mr driver checks command result. If the result is not zero, it prints out command information. This debug information is confusing since they are printed even when the non-zero result is expected. "Power-on or device reset occurred" is printed for Test Unit Ready command at drive detection. Inquiry failure for unsupported VPD page header is also printed. They are harmless but look like failures. To avoid the confusion, print the command reply debug information only when the module parameter logging_level has value MPI3_DEBUG_SCSI_ERROR= 64, in same manner as mpt3sas driver. Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20221111014449.1649968-1-shinichiro.kawasaki@wdc.com Reviewed-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index f77ee4051b00..3306de7170f6 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3265,7 +3265,8 @@ void mpi3mr_process_op_reply_desc(struct mpi3mr_ioc *mrioc, } if (scmd->result != (DID_OK << 16) && (scmd->cmnd[0] != ATA_12) && - (scmd->cmnd[0] != ATA_16)) { + (scmd->cmnd[0] != ATA_16) && + mrioc->logging_level & MPI3_DEBUG_SCSI_ERROR) { ioc_info(mrioc, "%s :scmd->result 0x%x\n", __func__, scmd->result); scsi_print_command(scmd); From bc68e428d4963af0201e92159629ab96948f0893 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 15 Nov 2022 09:50:42 +0800 Subject: [PATCH 396/963] scsi: target: tcm_loop: Fix possible name leak in tcm_loop_setup_hba_bus() If device_register() fails in tcm_loop_setup_hba_bus(), the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(). The 'tl_hba' will be freed in tcm_loop_release_adapter(), so it don't need goto error label in this case. Fixes: 3703b2c5d041 ("[SCSI] tcm_loop: Add multi-fabric Linux/SCSI LLD fabric module") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221115015042.3652261-1-yangyingliang@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/loopback/tcm_loop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 4407b56aa6d1..139031ccb700 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -397,6 +397,7 @@ static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host ret = device_register(&tl_hba->dev); if (ret) { pr_err("device_register() failed for tl_hba->dev: %d\n", ret); + put_device(&tl_hba->dev); return -ENODEV; } @@ -1073,7 +1074,7 @@ static struct se_wwn *tcm_loop_make_scsi_hba( */ ret = tcm_loop_setup_hba_bus(tl_hba, tcm_loop_hba_no_cnt); if (ret) - goto out; + return ERR_PTR(ret); sh = tl_hba->sh; tcm_loop_hba_no_cnt++; From e208a1d795a08d1ac0398c79ad9c58106531bcc5 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 17 Nov 2022 08:44:21 +0000 Subject: [PATCH 397/963] scsi: scsi_debug: Fix possible UAF in sdebug_add_host_helper() If device_register() fails in sdebug_add_host_helper(), it will goto clean and sdbg_host will be freed, but sdbg_host->host_list will not be removed from sdebug_host_list, then list traversal may cause UAF. Fix it. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221117084421.58918-1-yuancan@huawei.com Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 629853662b82..bebda917b138 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -7323,8 +7323,12 @@ static int sdebug_add_host_helper(int per_host_idx) dev_set_name(&sdbg_host->dev, "adapter%d", sdebug_num_hosts); error = device_register(&sdbg_host->dev); - if (error) + if (error) { + spin_lock(&sdebug_host_list_lock); + list_del(&sdbg_host->host_list); + spin_unlock(&sdebug_host_list_lock); goto clean; + } ++sdebug_num_hosts; return 0; From 0954256e970ecf371b03a6c9af2cf91b9c4085ff Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Wed, 16 Nov 2022 11:50:37 +0100 Subject: [PATCH 398/963] scsi: zfcp: Fix double free of FSF request when qdio send fails We used to use the wrong type of integer in 'zfcp_fsf_req_send()' to cache the FSF request ID when sending a new FSF request. This is used in case the sending fails and we need to remove the request from our internal hash table again (so we don't keep an invalid reference and use it when we free the request again). In 'zfcp_fsf_req_send()' we used to cache the ID as 'int' (signed and 32 bit wide), but the rest of the zfcp code (and the firmware specification) handles the ID as 'unsigned long'/'u64' (unsigned and 64 bit wide [s390x ELF ABI]). For one this has the obvious problem that when the ID grows past 32 bit (this can happen reasonably fast) it is truncated to 32 bit when storing it in the cache variable and so doesn't match the original ID anymore. The second less obvious problem is that even when the original ID has not yet grown past 32 bit, as soon as the 32nd bit is set in the original ID (0x80000000 = 2'147'483'648) we will have a mismatch when we cast it back to 'unsigned long'. As the cached variable is of a signed type, the compiler will choose a sign-extending instruction to load the 32 bit variable into a 64 bit register (e.g.: 'lgf %r11,188(%r15)'). So once we pass the cached variable into 'zfcp_reqlist_find_rm()' to remove the request again all the leading zeros will be flipped to ones to extend the sign and won't match the original ID anymore (this has been observed in practice). If we can't successfully remove the request from the hash table again after 'zfcp_qdio_send()' fails (this happens regularly when zfcp cannot notify the adapter about new work because the adapter is already gone during e.g. a ChpID toggle) we will end up with a double free. We unconditionally free the request in the calling function when 'zfcp_fsf_req_send()' fails, but because the request is still in the hash table we end up with a stale memory reference, and once the zfcp adapter is either reset during recovery or shutdown we end up freeing the same memory twice. The resulting stack traces vary depending on the kernel and have no direct correlation to the place where the bug occurs. Here are three examples that have been seen in practice: list_del corruption. next->prev should be 00000001b9d13800, but was 00000000dead4ead. (next=00000001bd131a00) ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:62! monitor event: 0040 ilc:2 [#1] PREEMPT SMP Modules linked in: ... CPU: 9 PID: 1617 Comm: zfcperp0.0.1740 Kdump: loaded Hardware name: ... Krnl PSW : 0704d00180000000 00000003cbeea1f8 (__list_del_entry_valid+0x98/0x140) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 00000000916d12f1 0000000080000000 000000000000006d 00000003cb665cd6 0000000000000001 0000000000000000 0000000000000000 00000000d28d21e8 00000000d3844000 00000380099efd28 00000001bd131a00 00000001b9d13800 00000000d3290100 0000000000000000 00000003cbeea1f4 00000380099efc70 Krnl Code: 00000003cbeea1e8: c020004f68a7 larl %r2,00000003cc8d7336 00000003cbeea1ee: c0e50027fd65 brasl %r14,00000003cc3e9cb8 #00000003cbeea1f4: af000000 mc 0,0 >00000003cbeea1f8: c02000920440 larl %r2,00000003cd12aa78 00000003cbeea1fe: c0e500289c25 brasl %r14,00000003cc3fda48 00000003cbeea204: b9040043 lgr %r4,%r3 00000003cbeea208: b9040051 lgr %r5,%r1 00000003cbeea20c: b9040032 lgr %r3,%r2 Call Trace: [<00000003cbeea1f8>] __list_del_entry_valid+0x98/0x140 ([<00000003cbeea1f4>] __list_del_entry_valid+0x94/0x140) [<000003ff7ff502fe>] zfcp_fsf_req_dismiss_all+0xde/0x150 [zfcp] [<000003ff7ff49cd0>] zfcp_erp_strategy_do_action+0x160/0x280 [zfcp] [<000003ff7ff4a22e>] zfcp_erp_strategy+0x21e/0xca0 [zfcp] [<000003ff7ff4ad34>] zfcp_erp_thread+0x84/0x1a0 [zfcp] [<00000003cb5eece8>] kthread+0x138/0x150 [<00000003cb557f3c>] __ret_from_fork+0x3c/0x60 [<00000003cc4172ea>] ret_from_fork+0xa/0x40 INFO: lockdep is turned off. Last Breaking-Event-Address: [<00000003cc3e9d04>] _printk+0x4c/0x58 Kernel panic - not syncing: Fatal exception: panic_on_oops or: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6803 Fault in home space mode while using kernel ASCE. AS:0000000063b10007 R3:0000000000000024 Oops: 0038 ilc:3 [#1] SMP Modules linked in: ... CPU: 10 PID: 0 Comm: swapper/10 Kdump: loaded Hardware name: ... Krnl PSW : 0404d00180000000 000003ff7febaf8e (zfcp_fsf_reqid_check+0x86/0x158 [zfcp]) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 5a6f1cfa89c49ac3 00000000aff2c4c8 6b6b6b6b6b6b6b6b 00000000000002a8 0000000000000000 0000000000000055 0000000000000000 00000000a8515800 0700000000000000 00000000a6e14500 00000000aff2c000 000000008003c44c 000000008093c700 0000000000000010 00000380009ebba8 00000380009ebb48 Krnl Code: 000003ff7febaf7e: a7f4003d brc 15,000003ff7febaff8 000003ff7febaf82: e32020000004 lg %r2,0(%r2) #000003ff7febaf88: ec2100388064 cgrj %r2,%r1,8,000003ff7febaff8 >000003ff7febaf8e: e3b020100020 cg %r11,16(%r2) 000003ff7febaf94: a774fff7 brc 7,000003ff7febaf82 000003ff7febaf98: ec280030007c cgij %r2,0,8,000003ff7febaff8 000003ff7febaf9e: e31020080004 lg %r1,8(%r2) 000003ff7febafa4: e33020000004 lg %r3,0(%r2) Call Trace: [<000003ff7febaf8e>] zfcp_fsf_reqid_check+0x86/0x158 [zfcp] [<000003ff7febbdbc>] zfcp_qdio_int_resp+0x6c/0x170 [zfcp] [<000003ff7febbf90>] zfcp_qdio_irq_tasklet+0xd0/0x108 [zfcp] [<0000000061d90a04>] tasklet_action_common.constprop.0+0xdc/0x128 [<000000006292f300>] __do_softirq+0x130/0x3c0 [<0000000061d906c6>] irq_exit_rcu+0xfe/0x118 [<000000006291e818>] do_io_irq+0xc8/0x168 [<000000006292d516>] io_int_handler+0xd6/0x110 [<000000006292d596>] psw_idle_exit+0x0/0xa ([<0000000061d3be50>] arch_cpu_idle+0x40/0xd0) [<000000006292ceea>] default_idle_call+0x52/0xf8 [<0000000061de4fa4>] do_idle+0xd4/0x168 [<0000000061de51fe>] cpu_startup_entry+0x36/0x40 [<0000000061d4faac>] smp_start_secondary+0x12c/0x138 [<000000006292d88e>] restart_int_handler+0x6e/0x90 Last Breaking-Event-Address: [<000003ff7febaf94>] zfcp_fsf_reqid_check+0x8c/0x158 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt or: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 523b05d3ae76a000 TEID: 523b05d3ae76a803 Fault in home space mode while using kernel ASCE. AS:0000000077c40007 R3:0000000000000024 Oops: 0038 ilc:3 [#1] SMP Modules linked in: ... CPU: 3 PID: 453 Comm: kworker/3:1H Kdump: loaded Hardware name: ... Workqueue: kblockd blk_mq_run_work_fn Krnl PSW : 0404d00180000000 0000000076fc0312 (__kmalloc+0xd2/0x398) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: ffffffffffffffff 523b05d3ae76abf6 0000000000000000 0000000000092a20 0000000000000002 00000007e49b5cc0 00000007eda8f000 0000000000092a20 00000007eda8f000 00000003b02856b9 00000000000000a8 523b05d3ae76abf6 00000007dd662000 00000007eda8f000 0000000076fc02b2 000003e0037637a0 Krnl Code: 0000000076fc0302: c004000000d4 brcl 0,76fc04aa 0000000076fc0308: b904001b lgr %r1,%r11 #0000000076fc030c: e3106020001a algf %r1,32(%r6) >0000000076fc0312: e31010000082 xg %r1,0(%r1) 0000000076fc0318: b9040001 lgr %r0,%r1 0000000076fc031c: e30061700082 xg %r0,368(%r6) 0000000076fc0322: ec59000100d9 aghik %r5,%r9,1 0000000076fc0328: e34003b80004 lg %r4,952 Call Trace: [<0000000076fc0312>] __kmalloc+0xd2/0x398 [<0000000076f318f2>] mempool_alloc+0x72/0x1f8 [<000003ff8027c5f8>] zfcp_fsf_req_create.isra.7+0x40/0x268 [zfcp] [<000003ff8027f1bc>] zfcp_fsf_fcp_cmnd+0xac/0x3f0 [zfcp] [<000003ff80280f1a>] zfcp_scsi_queuecommand+0x122/0x1d0 [zfcp] [<000003ff800b4218>] scsi_queue_rq+0x778/0xa10 [scsi_mod] [<00000000771782a0>] __blk_mq_try_issue_directly+0x130/0x208 [<000000007717a124>] blk_mq_request_issue_directly+0x4c/0xa8 [<000003ff801302e2>] dm_mq_queue_rq+0x2ea/0x468 [dm_mod] [<0000000077178c12>] blk_mq_dispatch_rq_list+0x33a/0x818 [<000000007717f064>] __blk_mq_do_dispatch_sched+0x284/0x2f0 [<000000007717f44c>] __blk_mq_sched_dispatch_requests+0x1c4/0x218 [<000000007717fa7a>] blk_mq_sched_dispatch_requests+0x52/0x90 [<0000000077176d74>] __blk_mq_run_hw_queue+0x9c/0xc0 [<0000000076da6d74>] process_one_work+0x274/0x4d0 [<0000000076da7018>] worker_thread+0x48/0x560 [<0000000076daef18>] kthread+0x140/0x160 [<000000007751d144>] ret_from_fork+0x28/0x30 Last Breaking-Event-Address: [<0000000076fc0474>] __kmalloc+0x234/0x398 Kernel panic - not syncing: Fatal exception: panic_on_oops To fix this, simply change the type of the cache variable to 'unsigned long', like the rest of zfcp and also the argument for 'zfcp_reqlist_find_rm()'. This prevents truncation and wrong sign extension and so can successfully remove the request from the hash table. Fixes: e60a6d69f1f8 ("[SCSI] zfcp: Remove function zfcp_reqlist_find_safe") Cc: #v2.6.34+ Signed-off-by: Benjamin Block Link: https://lore.kernel.org/r/979f6e6019d15f91ba56182f1aaf68d61bf37fc6.1668595505.git.bblock@linux.ibm.com Reviewed-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fsf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 19223b075568..ab3ea529cca7 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -884,7 +884,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req); struct zfcp_adapter *adapter = req->adapter; struct zfcp_qdio *qdio = adapter->qdio; - int req_id = req->req_id; + unsigned long req_id = req->req_id; zfcp_reqlist_add(adapter->req_list, req); From f014165faa7b953b81dcbf18835936e5f8d01f2a Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Thu, 10 Nov 2022 03:37:29 +0000 Subject: [PATCH 399/963] scsi: iscsi: Fix possible memory leak when device_register() failed If device_register() returns error, the name allocated by the dev_set_name() need be freed. As described in the comment of device_register(), we should use put_device() to give up the reference in the error path. Fix this by calling put_device(), the name will be freed in the kobject_cleanup(), and this patch modified resources will be released by calling the corresponding callback function in the device_release(). Signed-off-by: Zhou Guanghui Link: https://lore.kernel.org/r/20221110033729.1555-1-zhouguanghui1@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 31 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index cd3db9684e52..f473c002fa4d 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -231,7 +231,7 @@ iscsi_create_endpoint(int dd_size) dev_set_name(&ep->dev, "ep-%d", id); err = device_register(&ep->dev); if (err) - goto free_id; + goto put_dev; err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group); if (err) @@ -245,10 +245,12 @@ iscsi_create_endpoint(int dd_size) device_unregister(&ep->dev); return NULL; -free_id: +put_dev: mutex_lock(&iscsi_ep_idr_mutex); idr_remove(&iscsi_ep_idr, id); mutex_unlock(&iscsi_ep_idr_mutex); + put_device(&ep->dev); + return NULL; free_ep: kfree(ep); return NULL; @@ -766,7 +768,7 @@ iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *transport, err = device_register(&iface->dev); if (err) - goto free_iface; + goto put_dev; err = sysfs_create_group(&iface->dev.kobj, &iscsi_iface_group); if (err) @@ -780,9 +782,8 @@ iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *transport, device_unregister(&iface->dev); return NULL; -free_iface: - put_device(iface->dev.parent); - kfree(iface); +put_dev: + put_device(&iface->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_iface); @@ -1251,15 +1252,15 @@ iscsi_create_flashnode_sess(struct Scsi_Host *shost, int index, err = device_register(&fnode_sess->dev); if (err) - goto free_fnode_sess; + goto put_dev; if (dd_size) fnode_sess->dd_data = &fnode_sess[1]; return fnode_sess; -free_fnode_sess: - kfree(fnode_sess); +put_dev: + put_device(&fnode_sess->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_sess); @@ -1299,15 +1300,15 @@ iscsi_create_flashnode_conn(struct Scsi_Host *shost, err = device_register(&fnode_conn->dev); if (err) - goto free_fnode_conn; + goto put_dev; if (dd_size) fnode_conn->dd_data = &fnode_conn[1]; return fnode_conn; -free_fnode_conn: - kfree(fnode_conn); +put_dev: + put_device(&fnode_conn->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_conn); @@ -4815,7 +4816,7 @@ iscsi_register_transport(struct iscsi_transport *tt) dev_set_name(&priv->dev, "%s", tt->name); err = device_register(&priv->dev); if (err) - goto free_priv; + goto put_dev; err = sysfs_create_group(&priv->dev.kobj, &iscsi_transport_group); if (err) @@ -4850,8 +4851,8 @@ iscsi_register_transport(struct iscsi_transport *tt) unregister_dev: device_unregister(&priv->dev); return NULL; -free_priv: - kfree(priv); +put_dev: + put_device(&priv->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_register_transport); From b98186aee22fa593bc8c6b2c5d839c2ee518bc8c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Nov 2022 18:40:14 +0000 Subject: [PATCH 400/963] io_uring: update res mask in io_poll_check_events When io_poll_check_events() collides with someone attempting to queue a task work, it'll spin for one more time. However, it'll continue to use the mask from the first iteration instead of updating it. For example, if the first wake up was a EPOLLIN and the second EPOLLOUT, the userspace will not get EPOLLOUT in time. Clear the mask for all subsequent iterations to force vfs_poll(). Cc: stable@vger.kernel.org Fixes: aa43477b04025 ("io_uring: poll rework") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/2dac97e8f691231049cb259c4ae57e79e40b537c.1668710222.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/poll.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/io_uring/poll.c b/io_uring/poll.c index f500506984ec..90920abf91ff 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -258,6 +258,9 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return ret; } + /* force the next iteration to vfs_poll() */ + req->cqe.res = 0; + /* * Release all references, retry if someone tried to restart * task_work while we were executing it. From 539bcb57da2f58886d7d5c17134236b0ec9cd15d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Nov 2022 18:40:15 +0000 Subject: [PATCH 401/963] io_uring: fix tw losing poll events We may never try to process a poll wake and its mask if there was multiple wake ups racing for queueing up a tw. Force io_poll_check_events() to update the mask by vfs_poll(). Cc: stable@vger.kernel.org Fixes: aa43477b04025 ("io_uring: poll rework") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/00344d60f8b18907171178d7cf598de71d127b0b.1668710222.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/poll.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/io_uring/poll.c b/io_uring/poll.c index 90920abf91ff..c34019b18211 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -228,6 +228,13 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return IOU_POLL_DONE; if (v & IO_POLL_CANCEL_FLAG) return -ECANCELED; + /* + * cqe.res contains only events of the first wake up + * and all others are be lost. Redo vfs_poll() to get + * up to date state. + */ + if ((v & IO_POLL_REF_MASK) != 1) + req->cqe.res = 0; /* the mask was stashed in __io_poll_execute */ if (!req->cqe.res) { From 91482864768a874c4290ef93b84a78f4f1dac51b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Nov 2022 18:40:16 +0000 Subject: [PATCH 402/963] io_uring: fix multishot accept request leaks Having REQ_F_POLLED set doesn't guarantee that the request is executed as a multishot from the polling path. Fortunately for us, if the code thinks it's multishot issue when it's not, it can only ask to skip completion so leaking the request. Use issue_flags to mark multipoll issues. Cc: stable@vger.kernel.org Fixes: 390ed29b5e425 ("io_uring: add IORING_ACCEPT_MULTISHOT for accept") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7700ac57653f2823e30b34dc74da68678c0c5f13.1668710222.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 3 +++ io_uring/io_uring.c | 2 +- io_uring/io_uring.h | 4 ++-- io_uring/net.c | 7 ++----- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 43bc8a2edccf..0ded9e271523 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -16,6 +16,9 @@ enum io_uring_cmd_flags { IO_URING_F_SQE128 = 4, IO_URING_F_CQE32 = 8, IO_URING_F_IOPOLL = 16, + + /* the request is executed from poll, it should not be freed */ + IO_URING_F_MULTISHOT = 32, }; struct io_uring_cmd { diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4a1e482747cc..8840cf3e20f2 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1768,7 +1768,7 @@ int io_poll_issue(struct io_kiocb *req, bool *locked) io_tw_lock(req->ctx, locked); if (unlikely(req->task->flags & PF_EXITING)) return -EFAULT; - return io_issue_sqe(req, IO_URING_F_NONBLOCK); + return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT); } struct io_wq_work *io_wq_free_work(struct io_wq_work *work) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e99a79f2df9b..cef5ff924e63 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -17,8 +17,8 @@ enum { IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED, /* - * Intended only when both REQ_F_POLLED and REQ_F_APOLL_MULTISHOT - * are set to indicate to the poll runner that multishot should be + * Intended only when both IO_URING_F_MULTISHOT is passed + * to indicate to the poll runner that multishot should be * removed and the result is set on req->cqe.res. */ IOU_STOP_MULTISHOT = -ECANCELED, diff --git a/io_uring/net.c b/io_uring/net.c index 15dea91625e2..a390d3ea486c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1289,8 +1289,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags) * return EAGAIN to arm the poll infra since it * has already been done */ - if ((req->flags & IO_APOLL_MULTI_POLLED) == - IO_APOLL_MULTI_POLLED) + if (issue_flags & IO_URING_F_MULTISHOT) ret = IOU_ISSUE_SKIP_COMPLETE; return ret; } @@ -1315,9 +1314,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags) goto retry; io_req_set_res(req, ret, 0); - if (req->flags & REQ_F_POLLED) - return IOU_STOP_MULTISHOT; - return IOU_OK; + return (issue_flags & IO_URING_F_MULTISHOT) ? IOU_STOP_MULTISHOT : IOU_OK; } int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) From 100d6b17c06ee4c2b42fdddf0fe4ab77c86eb77e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 17 Nov 2022 18:40:17 +0000 Subject: [PATCH 403/963] io_uring: fix multishot recv request leaks Having REQ_F_POLLED set doesn't guarantee that the request is executed as a multishot from the polling path. Fortunately for us, if the code thinks it's multishot issue when it's not, it can only ask to skip completion so leaking the request. Use issue_flags to mark multipoll issues. Cc: stable@vger.kernel.org Fixes: 1300ebb20286b ("io_uring: multishot recv") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/37762040ba9c52b81b92a2f5ebfd4ee484088951.1668710222.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index a390d3ea486c..ab83da7e80f0 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -67,8 +67,6 @@ struct io_sr_msg { struct io_kiocb *notif; }; -#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) - int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); @@ -591,7 +589,8 @@ static inline void io_recv_prep_retry(struct io_kiocb *req) * again (for multishot). */ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, - unsigned int cflags, bool mshot_finished) + unsigned int cflags, bool mshot_finished, + unsigned issue_flags) { if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { io_req_set_res(req, *ret, cflags); @@ -614,7 +613,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, io_req_set_res(req, *ret, cflags); - if (req->flags & REQ_F_POLLED) + if (issue_flags & IO_URING_F_MULTISHOT) *ret = IOU_STOP_MULTISHOT; else *ret = IOU_OK; @@ -773,8 +772,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { ret = io_setup_async_msg(req, kmsg, issue_flags); - if (ret == -EAGAIN && (req->flags & IO_APOLL_MULTI_POLLED) == - IO_APOLL_MULTI_POLLED) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_MULTISHOT)) { io_kbuf_recycle(req, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } @@ -803,7 +801,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (kmsg->msg.msg_inq) cflags |= IORING_CQE_F_SOCK_NONEMPTY; - if (!io_recv_finish(req, &ret, cflags, mshot_finished)) + if (!io_recv_finish(req, &ret, cflags, mshot_finished, issue_flags)) goto retry_multishot; if (mshot_finished) { @@ -869,7 +867,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) ret = sock_recvmsg(sock, &msg, flags); if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) { - if ((req->flags & IO_APOLL_MULTI_POLLED) == IO_APOLL_MULTI_POLLED) { + if (issue_flags & IO_URING_F_MULTISHOT) { io_kbuf_recycle(req, issue_flags); return IOU_ISSUE_SKIP_COMPLETE; } @@ -902,7 +900,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) if (msg.msg_inq) cflags |= IORING_CQE_F_SOCK_NONEMPTY; - if (!io_recv_finish(req, &ret, cflags, ret <= 0)) + if (!io_recv_finish(req, &ret, cflags, ret <= 0, issue_flags)) goto retry_multishot; return ret; From ac9ccce8717df5aa5361c0cea9d8619f6d87af3c Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 16 Nov 2022 15:53:37 -0600 Subject: [PATCH 404/963] MAINTAINERS: Include PCI bindings in host bridge entry Almost all PCI bindings are controller bindings, so list them under the PCI native host bridge and endpoint entry. Link: https://lore.kernel.org/r/20221116215337.1032890-1-robh@kernel.org Signed-off-by: Rob Herring Signed-off-by: Bjorn Helgaas Cc: Lorenzo Pieralisi --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 05ca3a7f6967..b10f8c9faa46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15930,6 +15930,7 @@ Q: https://patchwork.kernel.org/project/linux-pci/list/ B: https://bugzilla.kernel.org C: irc://irc.oftc.net/linux-pci T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git +F: Documentation/devicetree/bindings/pci/ F: drivers/pci/controller/ F: drivers/pci/pci-bridge-emul.c F: drivers/pci/pci-bridge-emul.h From 08948caebe93482db1adfd2154eba124f66d161d Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 9 Nov 2022 09:44:32 +0000 Subject: [PATCH 405/963] ftrace: Fix the possible incorrect kernel message If the number of mcount entries is an integer multiple of ENTRIES_PER_PAGE, the page count showing on the console would be wrong. Link: https://lkml.kernel.org/r/20221109094434.84046-2-wangwensheng4@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: 5821e1b74f0d0 ("function tracing: fix wrong pos computing when read buffer has been fulfilled") Signed-off-by: Wang Wensheng Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7dc023641bf1..8b13ce2eae70 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7391,7 +7391,7 @@ void __init ftrace_init(void) } pr_info("ftrace: allocating %ld entries in %ld pages\n", - count, count / ENTRIES_PER_PAGE + 1); + count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE)); ret = ftrace_process_locs(NULL, __start_mcount_loc, From bcea02b096333dc74af987cb9685a4dbdd820840 Mon Sep 17 00:00:00 2001 From: Wang Wensheng Date: Wed, 9 Nov 2022 09:44:33 +0000 Subject: [PATCH 406/963] ftrace: Optimize the allocation for mcount entries If we can't allocate this size, try something smaller with half of the size. Its order should be decreased by one instead of divided by two. Link: https://lkml.kernel.org/r/20221109094434.84046-3-wangwensheng4@huawei.com Cc: Cc: Cc: stable@vger.kernel.org Fixes: a79008755497d ("ftrace: Allocate the mcount record pages as groups") Signed-off-by: Wang Wensheng Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8b13ce2eae70..56a168121bfc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3190,7 +3190,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) /* if we can't allocate this size, try something smaller */ if (!order) return -ENOMEM; - order >>= 1; + order--; goto again; } From 56f4ca0a79a9f1af98f26c54b9b89ba1f9bcc6bd Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Mon, 14 Nov 2022 17:31:29 +0300 Subject: [PATCH 407/963] ring_buffer: Do not deactivate non-existant pages rb_head_page_deactivate() expects cpu_buffer to contain a valid list of ->pages, so verify that the list is actually present before calling it. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Link: https://lkml.kernel.org/r/20221114143129.3534443-1-d-tatianin@yandex-team.ru Cc: stable@vger.kernel.org Fixes: 77ae365eca895 ("ring-buffer: make lockless") Signed-off-by: Daniil Tatianin Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a19369c4d8df..b21bf14bae9b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1802,9 +1802,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) free_buffer_page(cpu_buffer->reader_page); - rb_head_page_deactivate(cpu_buffer); - if (head) { + rb_head_page_deactivate(cpu_buffer); + list_for_each_entry_safe(bpage, tmp, head, list) { list_del_init(&bpage->list); free_buffer_page(bpage); From 19ba6c8af9382c4c05dc6a0a79af3013b9a35cd0 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 16 Nov 2022 09:52:07 +0800 Subject: [PATCH 408/963] ftrace: Fix null pointer dereference in ftrace_add_mod() The @ftrace_mod is allocated by kzalloc(), so both the members {prev,next} of @ftrace_mode->list are NULL, it's not a valid state to call list_del(). If kstrdup() for @ftrace_mod->{func|module} fails, it goes to @out_free tag and calls free_ftrace_mod() to destroy @ftrace_mod, then list_del() will write prev->next and next->prev, where null pointer dereference happens. BUG: kernel NULL pointer dereference, address: 0000000000000008 Oops: 0002 [#1] PREEMPT SMP NOPTI Call Trace: ftrace_mod_callback+0x20d/0x220 ? do_filp_open+0xd9/0x140 ftrace_process_regex.isra.51+0xbf/0x130 ftrace_regex_write.isra.52.part.53+0x6e/0x90 vfs_write+0xee/0x3a0 ? __audit_filter_op+0xb1/0x100 ? auditd_test_task+0x38/0x50 ksys_write+0xa5/0xe0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Kernel panic - not syncing: Fatal exception So call INIT_LIST_HEAD() to initialize the list member to fix this issue. Link: https://lkml.kernel.org/r/20221116015207.30858-1-xiujianfeng@huawei.com Cc: stable@vger.kernel.org Fixes: 673feb9d76ab ("ftrace: Add :mod: caching infrastructure to trace_array") Signed-off-by: Xiu Jianfeng Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 56a168121bfc..33236241f236 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1289,6 +1289,7 @@ static int ftrace_add_mod(struct trace_array *tr, if (!ftrace_mod) return -ENOMEM; + INIT_LIST_HEAD(&ftrace_mod->list); ftrace_mod->func = kstrdup(func, GFP_KERNEL); ftrace_mod->module = kstrdup(module, GFP_KERNEL); ftrace_mod->enable = enable; From a4527fef9afe5c903c718d0cd24609fe9c754250 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 09:23:45 +0800 Subject: [PATCH 409/963] tracing: Fix memory leak in test_gen_synth_cmd() and test_empty_synth_event() test_gen_synth_cmd() only free buf in fail path, hence buf will leak when there is no failure. Add kfree(buf) to prevent the memleak. The same reason and solution in test_empty_synth_event(). unreferenced object 0xffff8881127de000 (size 2048): comm "modprobe", pid 247, jiffies 4294972316 (age 78.756s) hex dump (first 32 bytes): 20 67 65 6e 5f 73 79 6e 74 68 5f 74 65 73 74 20 gen_synth_test 20 70 69 64 5f 74 20 6e 65 78 74 5f 70 69 64 5f pid_t next_pid_ backtrace: [<000000004254801a>] kmalloc_trace+0x26/0x100 [<0000000039eb1cf5>] 0xffffffffa00083cd [<000000000e8c3bc8>] 0xffffffffa00086ba [<00000000c293d1ea>] do_one_initcall+0xdb/0x480 [<00000000aa189e6d>] do_init_module+0x1cf/0x680 [<00000000d513222b>] load_module+0x6a50/0x70a0 [<000000001fd4d529>] __do_sys_finit_module+0x12f/0x1c0 [<00000000b36c4c0f>] do_syscall_64+0x3f/0x90 [<00000000bbf20cf3>] entry_SYSCALL_64_after_hwframe+0x63/0xcd unreferenced object 0xffff8881127df000 (size 2048): comm "modprobe", pid 247, jiffies 4294972324 (age 78.728s) hex dump (first 32 bytes): 20 65 6d 70 74 79 5f 73 79 6e 74 68 5f 74 65 73 empty_synth_tes 74 20 20 70 69 64 5f 74 20 6e 65 78 74 5f 70 69 t pid_t next_pi backtrace: [<000000004254801a>] kmalloc_trace+0x26/0x100 [<00000000d4db9a3d>] 0xffffffffa0008071 [<00000000c31354a5>] 0xffffffffa00086ce [<00000000c293d1ea>] do_one_initcall+0xdb/0x480 [<00000000aa189e6d>] do_init_module+0x1cf/0x680 [<00000000d513222b>] load_module+0x6a50/0x70a0 [<000000001fd4d529>] __do_sys_finit_module+0x12f/0x1c0 [<00000000b36c4c0f>] do_syscall_64+0x3f/0x90 [<00000000bbf20cf3>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lkml.kernel.org/r/20221117012346.22647-2-shangxiaojing@huawei.com Cc: Cc: Cc: Cc: stable@vger.kernel.org Fixes: 9fe41efaca08 ("tracing: Add synth event generation test module") Signed-off-by: Shang XiaoJing Signed-off-by: Steven Rostedt (Google) --- kernel/trace/synth_event_gen_test.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c index 0b15e975d2c2..8d77526892f4 100644 --- a/kernel/trace/synth_event_gen_test.c +++ b/kernel/trace/synth_event_gen_test.c @@ -120,15 +120,13 @@ static int __init test_gen_synth_cmd(void) /* Now generate a gen_synth_test event */ ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals)); - out: + free: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ synth_event_delete("gen_synth_test"); - free: - kfree(buf); - - goto out; + goto free; } /* @@ -227,15 +225,13 @@ static int __init test_empty_synth_event(void) /* Now trace an empty_synth_test event */ ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals)); - out: + free: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ synth_event_delete("empty_synth_test"); - free: - kfree(buf); - - goto out; + goto free; } static struct synth_field_desc create_synth_test_fields[] = { From 1b5f1c34d3f5a664a57a5a7557a50e4e3cc2505c Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 09:23:46 +0800 Subject: [PATCH 410/963] tracing: Fix wild-memory-access in register_synth_event() In register_synth_event(), if set_synth_event_print_fmt() failed, then both trace_remove_event_call() and unregister_trace_event() will be called, which means the trace_event_call will call __unregister_trace_event() twice. As the result, the second unregister will causes the wild-memory-access. register_synth_event set_synth_event_print_fmt failed trace_remove_event_call event_remove if call->event.funcs then __unregister_trace_event (first call) unregister_trace_event __unregister_trace_event (second call) Fix the bug by avoiding to call the second __unregister_trace_event() by checking if the first one is called. general protection fault, probably for non-canonical address 0xfbd59c0000000024: 0000 [#1] SMP KASAN PTI KASAN: maybe wild-memory-access in range [0xdead000000000120-0xdead000000000127] CPU: 0 PID: 3807 Comm: modprobe Not tainted 6.1.0-rc1-00186-g76f33a7eedb4 #299 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:unregister_trace_event+0x6e/0x280 Code: 00 fc ff df 4c 89 ea 48 c1 ea 03 80 3c 02 00 0f 85 0e 02 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 08 4c 89 e2 48 c1 ea 03 <80> 3c 02 00 0f 85 e2 01 00 00 49 89 2c 24 48 85 ed 74 28 e8 7a 9b RSP: 0018:ffff88810413f370 EFLAGS: 00010a06 RAX: dffffc0000000000 RBX: ffff888105d050b0 RCX: 0000000000000000 RDX: 1bd5a00000000024 RSI: ffff888119e276e0 RDI: ffffffff835a8b20 RBP: dead000000000100 R08: 0000000000000000 R09: fffffbfff0913481 R10: ffffffff8489a407 R11: fffffbfff0913480 R12: dead000000000122 R13: ffff888105d050b8 R14: 0000000000000000 R15: ffff888105d05028 FS: 00007f7823e8d540(0000) GS:ffff888119e00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7823e7ebec CR3: 000000010a058002 CR4: 0000000000330ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __create_synth_event+0x1e37/0x1eb0 create_or_delete_synth_event+0x110/0x250 synth_event_run_command+0x2f/0x110 test_gen_synth_cmd+0x170/0x2eb [synth_event_gen_test] synth_event_gen_test_init+0x76/0x9bc [synth_event_gen_test] do_one_initcall+0xdb/0x480 do_init_module+0x1cf/0x680 load_module+0x6a50/0x70a0 __do_sys_finit_module+0x12f/0x1c0 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lkml.kernel.org/r/20221117012346.22647-3-shangxiaojing@huawei.com Fixes: 4b147936fa50 ("tracing: Add support for 'synthetic' events") Signed-off-by: Shang XiaoJing Cc: stable@vger.kernel.org Cc: Cc: Cc: Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index e310052dc83c..29fbfb27c2b2 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -828,10 +828,9 @@ static int register_synth_event(struct synth_event *event) } ret = set_synth_event_print_fmt(call); - if (ret < 0) { + /* unregister_trace_event() will be called inside */ + if (ret < 0) trace_remove_event_call(call); - goto err; - } out: return ret; err: From 689eb2f1ba46b4b02195ac2a71c55b96d619ebf8 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 16 Nov 2022 15:23:48 +0800 Subject: [PATCH 411/963] libbpf: Use page size as max_entries when probing ring buffer map Using page size as max_entries when probing ring buffer map, else the probe may fail on host with 64KB page size (e.g., an ARM64 host). After the fix, the output of "bpftool feature" on above host will be correct. Before : eBPF map_type ringbuf is NOT available eBPF map_type user_ringbuf is NOT available After : eBPF map_type ringbuf is available eBPF map_type user_ringbuf is available Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221116072351.1168938-2-houtao@huaweicloud.com --- tools/lib/bpf/libbpf_probes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index f3a8e8e74eb8..d504d96adc83 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -234,7 +234,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_USER_RINGBUF: key_size = 0; value_size = 0; - max_entries = 4096; + max_entries = sysconf(_SC_PAGE_SIZE); break; case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ From 927cbb478adf917e0a142b94baa37f06279cc466 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 16 Nov 2022 15:23:49 +0800 Subject: [PATCH 412/963] libbpf: Handle size overflow for ringbuf mmap The maximum size of ringbuf is 2GB on x86-64 host, so 2 * max_entries will overflow u32 when mapping producer page and data pages. Only casting max_entries to size_t is not enough, because for 32-bits application on 64-bits kernel the size of read-only mmap region also could overflow size_t. So fixing it by casting the size of read-only mmap region into a __u64 and checking whether or not there will be overflow during mmap. Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support") Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221116072351.1168938-3-houtao@huaweicloud.com --- tools/lib/bpf/ringbuf.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index d285171d4b69..8d26684f3f00 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -77,6 +77,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, __u32 len = sizeof(info); struct epoll_event *e; struct ring *r; + __u64 mmap_sz; void *tmp; int err; @@ -115,8 +116,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, r->mask = info.max_entries - 1; /* Map writable consumer page */ - tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, - map_fd, 0); + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", @@ -129,8 +129,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, * data size to allow simple reading of samples that wrap around the * end of a ring buffer. See kernel implementation for details. * */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, - MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); + return libbpf_err(-E2BIG); + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; ringbuf_unmap_ring(rb, r); From 64176bff2446cd825b163976ee451fb6e5cd851d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 16 Nov 2022 15:23:50 +0800 Subject: [PATCH 413/963] libbpf: Handle size overflow for user ringbuf mmap Similar with the overflow problem on ringbuf mmap, in user_ringbuf_map() 2 * max_entries may overflow u32 when mapping writeable region. Fixing it by casting the size of writable mmap region into a __u64 and checking whether or not there will be overflow during mmap. Fixes: b66ccae01f1d ("bpf: Add libbpf logic for user-space ring buffer") Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221116072351.1168938-4-houtao@huaweicloud.com --- tools/lib/bpf/ringbuf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 8d26684f3f00..5c4401cac1db 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -352,6 +352,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) { struct bpf_map_info info; __u32 len = sizeof(info); + __u64 mmap_sz; void *tmp; struct epoll_event *rb_epoll; int err; @@ -388,8 +389,13 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) * simple reading and writing of samples that wrap around the end of * the buffer. See the kernel implementation for details. */ - tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, - PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size); + mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; + if (mmap_sz != (__u64)(size_t)mmap_sz) { + pr_warn("user ringbuf: ring buf size (%u) is too big\n", info.max_entries); + return -E2BIG; + } + tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", From 05c1558bfcb63b95a9f530767c04c7db091560f2 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 16 Nov 2022 15:23:51 +0800 Subject: [PATCH 414/963] libbpf: Check the validity of size in user_ring_buffer__reserve() The top two bits of size are used as busy and discard flags, so reject the reservation that has any of these special bits in the size. With the addition of validity check, these is also no need to check whether or not total_size is overflowed. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221116072351.1168938-5-houtao@huaweicloud.com --- tools/lib/bpf/ringbuf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 5c4401cac1db..6af142953a94 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -486,6 +486,10 @@ void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size) __u64 cons_pos, prod_pos; struct ringbuf_hdr *hdr; + /* The top two bits are used as special flags */ + if (size & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT)) + return errno = E2BIG, NULL; + /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in * the kernel. */ From e0d75267f59d7084e0468bd68beeb1bf9c71d7c0 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Fri, 18 Nov 2022 10:15:33 +0900 Subject: [PATCH 415/963] tracing: kprobe: Fix potential null-ptr-deref on trace_event_file in kprobe_event_gen_test_exit() When trace_get_event_file() failed, gen_kretprobe_test will be assigned as the error code. If module kprobe_event_gen_test is removed now, the null pointer dereference will happen in kprobe_event_gen_test_exit(). Check if gen_kprobe_test or gen_kretprobe_test is error code or NULL before dereference them. BUG: kernel NULL pointer dereference, address: 0000000000000012 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 2210 Comm: modprobe Not tainted 6.1.0-rc1-00171-g2159299a3b74-dirty #217 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:kprobe_event_gen_test_exit+0x1c/0xb5 [kprobe_event_gen_test] Code: Unable to access opcode bytes at 0xffffffff9ffffff2. RSP: 0018:ffffc900015bfeb8 EFLAGS: 00010246 RAX: ffffffffffffffea RBX: ffffffffa0002080 RCX: 0000000000000000 RDX: ffffffffa0001054 RSI: ffffffffa0001064 RDI: ffffffffdfc6349c RBP: ffffffffa0000000 R08: 0000000000000004 R09: 00000000001e95c0 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000800 R13: ffffffffa0002420 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f56b75be540(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffff9ffffff2 CR3: 000000010874a006 CR4: 0000000000330ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __x64_sys_delete_module+0x206/0x380 ? lockdep_hardirqs_on_prepare+0xd8/0x190 ? syscall_enter_from_user_mode+0x1c/0x50 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lore.kernel.org/all/20221108015130.28326-2-shangxiaojing@huawei.com/ Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Signed-off-by: Shang XiaoJing Acked-by: Masami Hiramatsu (Google) Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/kprobe_event_gen_test.c | 44 ++++++++++++++++++---------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index d81f7c51025c..1c98fafcf333 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -73,6 +73,10 @@ static struct trace_event_file *gen_kretprobe_test; #define KPROBE_GEN_TEST_ARG3 NULL #endif +static bool trace_event_file_is_valid(struct trace_event_file *input) +{ + return input && !IS_ERR(input); +} /* * Test to make sure we can create a kprobe event, then add more @@ -217,10 +221,12 @@ static int __init kprobe_event_gen_test_init(void) ret = test_gen_kretprobe_cmd(); if (ret) { - WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, - "kprobes", - "gen_kretprobe_test", false)); - trace_put_event_file(gen_kretprobe_test); + if (trace_event_file_is_valid(gen_kretprobe_test)) { + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + trace_put_event_file(gen_kretprobe_test); + } WARN_ON(kprobe_event_delete("gen_kretprobe_test")); } @@ -229,24 +235,30 @@ static int __init kprobe_event_gen_test_init(void) static void __exit kprobe_event_gen_test_exit(void) { - /* Disable the event or you can't remove it */ - WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, - "kprobes", - "gen_kprobe_test", false)); + if (trace_event_file_is_valid(gen_kprobe_test)) { + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kprobe_test->tr, + "kprobes", + "gen_kprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kprobe_test); + } - /* Now give the file and instance back */ - trace_put_event_file(gen_kprobe_test); /* Now unregister and free the event */ WARN_ON(kprobe_event_delete("gen_kprobe_test")); - /* Disable the event or you can't remove it */ - WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, - "kprobes", - "gen_kretprobe_test", false)); + if (trace_event_file_is_valid(gen_kretprobe_test)) { + /* Disable the event or you can't remove it */ + WARN_ON(trace_array_set_clr_event(gen_kretprobe_test->tr, + "kprobes", + "gen_kretprobe_test", false)); + + /* Now give the file and instance back */ + trace_put_event_file(gen_kretprobe_test); + } - /* Now give the file and instance back */ - trace_put_event_file(gen_kretprobe_test); /* Now unregister and free the event */ WARN_ON(kprobe_event_delete("gen_kretprobe_test")); From 22ea4ca9631eb137e64e5ab899e9c89cb6670959 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Fri, 18 Nov 2022 10:15:34 +0900 Subject: [PATCH 416/963] tracing: kprobe: Fix potential null-ptr-deref on trace_array in kprobe_event_gen_test_exit() When test_gen_kprobe_cmd() failed after kprobe_event_gen_cmd_end(), it will goto delete, which will call kprobe_event_delete() and release the corresponding resource. However, the trace_array in gen_kretprobe_test will point to the invalid resource. Set gen_kretprobe_test to NULL after called kprobe_event_delete() to prevent null-ptr-deref. BUG: kernel NULL pointer dereference, address: 0000000000000070 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 246 Comm: modprobe Tainted: G W 6.1.0-rc1-00174-g9522dc5c87da-dirty #248 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 RIP: 0010:__ftrace_set_clr_event_nolock+0x53/0x1b0 Code: e8 82 26 fc ff 49 8b 1e c7 44 24 0c ea ff ff ff 49 39 de 0f 84 3c 01 00 00 c7 44 24 18 00 00 00 00 e8 61 26 fc ff 48 8b 6b 10 <44> 8b 65 70 4c 8b 6d 18 41 f7 c4 00 02 00 00 75 2f RSP: 0018:ffffc9000159fe00 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff88810971d268 RCX: 0000000000000000 RDX: ffff8881080be600 RSI: ffffffff811b48ff RDI: ffff88810971d058 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000001 R10: ffffc9000159fe58 R11: 0000000000000001 R12: ffffffffa0001064 R13: ffffffffa000106c R14: ffff88810971d238 R15: 0000000000000000 FS: 00007f89eeff6540(0000) GS:ffff88813b600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000070 CR3: 000000010599e004 CR4: 0000000000330ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __ftrace_set_clr_event+0x3e/0x60 trace_array_set_clr_event+0x35/0x50 ? 0xffffffffa0000000 kprobe_event_gen_test_exit+0xcd/0x10b [kprobe_event_gen_test] __x64_sys_delete_module+0x206/0x380 ? lockdep_hardirqs_on_prepare+0xd8/0x190 ? syscall_enter_from_user_mode+0x1c/0x50 do_syscall_64+0x3f/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f89eeb061b7 Link: https://lore.kernel.org/all/20221108015130.28326-3-shangxiaojing@huawei.com/ Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module") Signed-off-by: Shang XiaoJing Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/kprobe_event_gen_test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index 1c98fafcf333..c736487fc0e4 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -143,6 +143,8 @@ static int __init test_gen_kprobe_cmd(void) kfree(buf); return ret; delete: + if (trace_event_file_is_valid(gen_kprobe_test)) + gen_kprobe_test = NULL; /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kprobe_test"); goto out; @@ -206,6 +208,8 @@ static int __init test_gen_kretprobe_cmd(void) kfree(buf); return ret; delete: + if (trace_event_file_is_valid(gen_kretprobe_test)) + gen_kretprobe_test = NULL; /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kretprobe_test"); goto out; From d1776c0202aac8251e6b4cbe096ad2950ed6c506 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Fri, 18 Nov 2022 10:15:34 +0900 Subject: [PATCH 417/963] tracing/eprobe: Fix memory leak of filter string The filter string doesn't get freed when a dynamic event is deleted. If a filter is set, then memory is leaked: root@localhost:/sys/kernel/tracing# echo 'e:egroup/stat_runtime_4core \ sched/sched_stat_runtime runtime=$runtime:u32 if cpu < 4' >> dynamic_events root@localhost:/sys/kernel/tracing# echo "-:egroup/stat_runtime_4core" >> dynamic_events root@localhost:/sys/kernel/tracing# echo scan > /sys/kernel/debug/kmemleak [ 224.416373] kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak) root@localhost:/sys/kernel/tracing# cat /sys/kernel/debug/kmemleak unreferenced object 0xffff88810156f1b8 (size 8): comm "bash", pid 224, jiffies 4294935612 (age 55.800s) hex dump (first 8 bytes): 63 70 75 20 3c 20 34 00 cpu < 4. backtrace: [<000000009f880725>] __kmem_cache_alloc_node+0x18e/0x720 [<0000000042492946>] __kmalloc+0x57/0x240 [<0000000034ea7995>] __trace_eprobe_create+0x1214/0x1d30 [<00000000d70ef730>] trace_probe_create+0xf6/0x110 [<00000000915c7b16>] eprobe_dyn_event_create+0x21/0x30 [<000000000d894386>] create_dyn_event+0xf3/0x1a0 [<00000000e9af57d5>] trace_parse_run_command+0x1a9/0x2e0 [<0000000080777f18>] dyn_event_write+0x39/0x50 [<0000000089f0ec73>] vfs_write+0x311/0xe50 [<000000003da1bdda>] ksys_write+0x158/0x2a0 [<00000000bb1e616e>] __x64_sys_write+0x7c/0xc0 [<00000000e8aef1f7>] do_syscall_64+0x60/0x90 [<00000000fe7fe8ba>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Additionally, in __trace_eprobe_create() function, if an error occurs after the call to trace_eprobe_parse_filter(), which allocates the filter string, then memory is also leaked. That can be reproduced by creating the same event probe twice: root@localhost:/sys/kernel/tracing# echo 'e:egroup/stat_runtime_4core \ sched/sched_stat_runtime runtime=$runtime:u32 if cpu < 4' >> dynamic_events root@localhost:/sys/kernel/tracing# echo 'e:egroup/stat_runtime_4core \ sched/sched_stat_runtime runtime=$runtime:u32 if cpu < 4' >> dynamic_events -bash: echo: write error: File exists root@localhost:/sys/kernel/tracing# echo scan > /sys/kernel/debug/kmemleak [ 207.871584] kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak) root@localhost:/sys/kernel/tracing# cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8881020d17a8 (size 8): comm "bash", pid 223, jiffies 4294938308 (age 31.000s) hex dump (first 8 bytes): 63 70 75 20 3c 20 34 00 cpu < 4. backtrace: [<000000000e4f5f31>] __kmem_cache_alloc_node+0x18e/0x720 [<0000000024f0534b>] __kmalloc+0x57/0x240 [<000000002930a28e>] __trace_eprobe_create+0x1214/0x1d30 [<0000000028387903>] trace_probe_create+0xf6/0x110 [<00000000a80d6a9f>] eprobe_dyn_event_create+0x21/0x30 [<000000007168698c>] create_dyn_event+0xf3/0x1a0 [<00000000f036bf6a>] trace_parse_run_command+0x1a9/0x2e0 [<00000000014bde8b>] dyn_event_write+0x39/0x50 [<0000000078a097f7>] vfs_write+0x311/0xe50 [<00000000996cb208>] ksys_write+0x158/0x2a0 [<00000000a3c2acb0>] __x64_sys_write+0x7c/0xc0 [<0000000006b5d698>] do_syscall_64+0x60/0x90 [<00000000780e8ecf>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fix both issues by releasing the filter string in trace_event_probe_cleanup(). Link: https://lore.kernel.org/all/20221108235738.1021467-1-rafaelmendsr@gmail.com/ Fixes: 752be5c5c910 ("tracing/eprobe: Add eprobe filter support") Signed-off-by: Rafael Mendonca Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_eprobe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 5dd0617e5df6..fe4833a7b7b3 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -52,6 +52,7 @@ static void trace_event_probe_cleanup(struct trace_eprobe *ep) kfree(ep->event_system); if (ep->event) trace_event_put_ref(ep->event); + kfree(ep->filter_str); kfree(ep); } From 0a1ebe35cb3b7aa1f4b26b37e2a0b9ae68dc4ffb Mon Sep 17 00:00:00 2001 From: Yi Yang Date: Fri, 18 Nov 2022 10:15:34 +0900 Subject: [PATCH 418/963] rethook: fix a potential memleak in rethook_alloc() In rethook_alloc(), the variable rh is not freed or passed out if handler is NULL, which could lead to a memleak, fix it. Link: https://lore.kernel.org/all/20221110104438.88099-1-yiyang13@huawei.com/ [Masami: Add "rethook:" tag to the title.] Fixes: 54ecbe6f1ed5 ("rethook: Add a generic return hook") Cc: stable@vger.kernel.org Signed-off-by: Yi Yang Acke-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/rethook.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index c69d82273ce7..32c3dfdb4d6a 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -83,8 +83,10 @@ struct rethook *rethook_alloc(void *data, rethook_handler_t handler) { struct rethook *rh = kzalloc(sizeof(struct rethook), GFP_KERNEL); - if (!rh || !handler) + if (!rh || !handler) { + kfree(rh); return NULL; + } rh->data = data; rh->handler = handler; From 5dd7caf0bdc5d0bae7cf9776b4d739fb09bd5ebb Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 18 Nov 2022 10:15:34 +0900 Subject: [PATCH 419/963] kprobes: Skip clearing aggrprobe's post_handler in kprobe-on-ftrace case In __unregister_kprobe_top(), if the currently unregistered probe has post_handler but other child probes of the aggrprobe do not have post_handler, the post_handler of the aggrprobe is cleared. If this is a ftrace-based probe, there is a problem. In later calls to disarm_kprobe(), we will use kprobe_ftrace_ops because post_handler is NULL. But we're armed with kprobe_ipmodify_ops. This triggers a WARN in __disarm_kprobe_ftrace() and may even cause use-after-free: Failed to disarm kprobe-ftrace at kernel_clone+0x0/0x3c0 (error -2) WARNING: CPU: 5 PID: 137 at kernel/kprobes.c:1135 __disarm_kprobe_ftrace.isra.21+0xcf/0xe0 Modules linked in: testKprobe_007(-) CPU: 5 PID: 137 Comm: rmmod Not tainted 6.1.0-rc4-dirty #18 [...] Call Trace: __disable_kprobe+0xcd/0xe0 __unregister_kprobe_top+0x12/0x150 ? mutex_lock+0xe/0x30 unregister_kprobes.part.23+0x31/0xa0 unregister_kprobe+0x32/0x40 __x64_sys_delete_module+0x15e/0x260 ? do_user_addr_fault+0x2cd/0x6b0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd [...] For the kprobe-on-ftrace case, we keep the post_handler setting to identify this aggrprobe armed with kprobe_ipmodify_ops. This way we can disarm it correctly. Link: https://lore.kernel.org/all/20221112070000.35299-1-lihuafei1@huawei.com/ Fixes: 0bc11ed5ab60 ("kprobes: Allow kprobes coexist with livepatch") Reported-by: Zhao Gongyi Suggested-by: Masami Hiramatsu (Google) Signed-off-by: Li Huafei Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/kprobes.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index cd9f5a66a690..3050631e528d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1766,7 +1766,13 @@ static int __unregister_kprobe_top(struct kprobe *p) if ((list_p != p) && (list_p->post_handler)) goto noclean; } - ap->post_handler = NULL; + /* + * For the kprobe-on-ftrace case, we keep the + * post_handler setting to identify this aggrprobe + * armed with kprobe_ipmodify_ops. + */ + if (!kprobe_ftrace(ap)) + ap->post_handler = NULL; } noclean: /* From 342a4a2f99431ee3c72ef5cfff6449ccf2abd346 Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Fri, 18 Nov 2022 10:15:34 +0900 Subject: [PATCH 420/963] tracing/eprobe: Fix warning in filter creation The filter pointer (filterp) passed to create_filter() function must be a pointer that references a NULL pointer, otherwise, we get a warning when adding a filter option to the event probe: root@localhost:/sys/kernel/tracing# echo 'e:egroup/stat_runtime_4core sched/sched_stat_runtime \ runtime=$runtime:u32 if cpu < 4' >> dynamic_events [ 5034.340439] ------------[ cut here ]------------ [ 5034.341258] WARNING: CPU: 0 PID: 223 at kernel/trace/trace_events_filter.c:1939 create_filter+0x1db/0x250 [...] stripped [ 5034.345518] RIP: 0010:create_filter+0x1db/0x250 [...] stripped [ 5034.351604] Call Trace: [ 5034.351803] [ 5034.351959] ? process_preds+0x1b40/0x1b40 [ 5034.352241] ? rcu_read_lock_bh_held+0xd0/0xd0 [ 5034.352604] ? kasan_set_track+0x29/0x40 [ 5034.352904] ? kasan_save_alloc_info+0x1f/0x30 [ 5034.353264] create_event_filter+0x38/0x50 [ 5034.353573] __trace_eprobe_create+0x16f4/0x1d20 [ 5034.353964] ? eprobe_dyn_event_release+0x360/0x360 [ 5034.354363] ? mark_held_locks+0xa6/0xf0 [ 5034.354684] ? _raw_spin_unlock_irqrestore+0x35/0x60 [ 5034.355105] ? trace_hardirqs_on+0x41/0x120 [ 5034.355417] ? _raw_spin_unlock_irqrestore+0x35/0x60 [ 5034.355751] ? __create_object+0x5b7/0xcf0 [ 5034.356027] ? lock_is_held_type+0xaf/0x120 [ 5034.356362] ? rcu_read_lock_bh_held+0xb0/0xd0 [ 5034.356716] ? rcu_read_lock_bh_held+0xd0/0xd0 [ 5034.357084] ? kasan_set_track+0x29/0x40 [ 5034.357411] ? kasan_save_alloc_info+0x1f/0x30 [ 5034.357715] ? __kasan_kmalloc+0xb8/0xc0 [ 5034.357985] ? write_comp_data+0x2f/0x90 [ 5034.358302] ? __sanitizer_cov_trace_pc+0x25/0x60 [ 5034.358691] ? argv_split+0x381/0x460 [ 5034.358949] ? write_comp_data+0x2f/0x90 [ 5034.359240] ? eprobe_dyn_event_release+0x360/0x360 [ 5034.359620] trace_probe_create+0xf6/0x110 [ 5034.359940] ? trace_probe_match_command_args+0x240/0x240 [ 5034.360376] eprobe_dyn_event_create+0x21/0x30 [ 5034.360709] create_dyn_event+0xf3/0x1a0 [ 5034.360983] trace_parse_run_command+0x1a9/0x2e0 [ 5034.361297] ? dyn_event_release+0x500/0x500 [ 5034.361591] dyn_event_write+0x39/0x50 [ 5034.361851] vfs_write+0x311/0xe50 [ 5034.362091] ? dyn_event_seq_next+0x40/0x40 [ 5034.362376] ? kernel_write+0x5b0/0x5b0 [ 5034.362637] ? write_comp_data+0x2f/0x90 [ 5034.362937] ? __sanitizer_cov_trace_pc+0x25/0x60 [ 5034.363258] ? ftrace_syscall_enter+0x544/0x840 [ 5034.363563] ? write_comp_data+0x2f/0x90 [ 5034.363837] ? __sanitizer_cov_trace_pc+0x25/0x60 [ 5034.364156] ? write_comp_data+0x2f/0x90 [ 5034.364468] ? write_comp_data+0x2f/0x90 [ 5034.364770] ksys_write+0x158/0x2a0 [ 5034.365022] ? __ia32_sys_read+0xc0/0xc0 [ 5034.365344] __x64_sys_write+0x7c/0xc0 [ 5034.365669] ? syscall_enter_from_user_mode+0x53/0x70 [ 5034.366084] do_syscall_64+0x60/0x90 [ 5034.366356] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 5034.366767] RIP: 0033:0x7ff0b43938f3 [...] stripped [ 5034.371892] [ 5034.374720] ---[ end trace 0000000000000000 ]--- Link: https://lore.kernel.org/all/20221108202148.1020111-1-rafaelmendsr@gmail.com/ Fixes: 752be5c5c910 ("tracing/eprobe: Add eprobe filter support") Signed-off-by: Rafael Mendonca Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_eprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index fe4833a7b7b3..e888446d80fa 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -901,7 +901,7 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[]) { - struct event_filter *dummy; + struct event_filter *dummy = NULL; int i, ret, len = 0; char *p; From 40adaf51cb318131073d1ba8233d473cc105ecbf Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 18 Nov 2022 10:15:34 +0900 Subject: [PATCH 421/963] tracing/eprobe: Fix eprobe filter to make a filter correctly Since the eprobe filter was defined based on the eprobe's trace event itself, it doesn't work correctly. Use the original trace event of the eprobe when making the filter so that the filter works correctly. Without this fix: # echo 'e syscalls/sys_enter_openat \ flags_rename=$flags:u32 if flags < 1000' >> dynamic_events # echo 1 > events/eprobes/sys_enter_openat/enable [ 114.551550] event trace: Could not enable event sys_enter_openat -bash: echo: write error: Invalid argument With this fix: # echo 'e syscalls/sys_enter_openat \ flags_rename=$flags:u32 if flags < 1000' >> dynamic_events # echo 1 > events/eprobes/sys_enter_openat/enable # tail trace cat-241 [000] ...1. 266.498449: sys_enter_openat: (syscalls.sys_enter_openat) flags_rename=0 cat-242 [000] ...1. 266.977640: sys_enter_openat: (syscalls.sys_enter_openat) flags_rename=0 Link: https://lore.kernel.org/all/166823166395.1385292.8931770640212414483.stgit@devnote3/ Fixes: 752be5c5c910 ("tracing/eprobe: Add eprobe filter support") Reported-by: Rafael Mendonca Tested-by: Rafael Mendonca Signed-off-by: Masami Hiramatsu (Google) --- kernel/trace/trace_eprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index e888446d80fa..123d2c0a6b68 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -643,7 +643,7 @@ new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file) INIT_LIST_HEAD(&trigger->list); if (ep->filter_str) { - ret = create_event_filter(file->tr, file->event_call, + ret = create_event_filter(file->tr, ep->event, ep->filter_str, false, &filter); if (ret) goto error; From b8752064e30697e3982418f4274cc63cfc6f3027 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Fri, 18 Nov 2022 00:44:35 +0800 Subject: [PATCH 422/963] tracing: Remove unused __bad_type_size() method __bad_type_size() is unused after commit 04ae87a52074("ftrace: Rework event_create_dir()"). So, remove it. Link: https://lkml.kernel.org/r/D062EC2E-7DB7-4402-A67E-33C3577F551E@gmail.com Acked-by: Masami Hiramatsu (Google) Signed-off-by: Qiujun Huang Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_syscalls.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index b69e207012c9..942ddbdace4a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -201,8 +201,6 @@ print_syscall_exit(struct trace_iterator *iter, int flags, return trace_handle_return(s); } -extern char *__bad_type_size(void); - #define SYSCALL_FIELD(_type, _name) { \ .type = #_type, .name = #_name, \ .size = sizeof(_type), .align = __alignof__(_type), \ From 067df9e0ad48a97382ab2179bbe773a13a846028 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Mon, 14 Nov 2022 18:46:32 +0800 Subject: [PATCH 423/963] tracing: Fix potential null-pointer-access of entry in list 'tr->err_log' Entries in list 'tr->err_log' will be reused after entry number exceed TRACING_LOG_ERRS_MAX. The cmd string of the to be reused entry will be freed first then allocated a new one. If the allocation failed, then the entry will still be in list 'tr->err_log' but its 'cmd' field is set to be NULL, later access of 'cmd' is risky. Currently above problem can cause the loss of 'cmd' information of first entry in 'tr->err_log'. When execute `cat /sys/kernel/tracing/error_log`, reproduce logs like: [ 37.495100] trace_kprobe: error: Maxactive is not for kprobe(null) ^ [ 38.412517] trace_kprobe: error: Maxactive is not for kprobe Command: p4:myprobe2 do_sys_openat2 ^ Link: https://lore.kernel.org/linux-trace-kernel/20221114104632.3547266-1-zhengyejian1@huawei.com Fixes: 1581a884b7ca ("tracing: Remove size restriction on tracing_log_err cmd strings") Signed-off-by: Zheng Yejian Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5bd202d6d79a..a7fe0e115272 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7803,6 +7803,7 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, int len) { struct tracing_log_err *err; + char *cmd; if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) { err = alloc_tracing_log_err(len); @@ -7811,12 +7812,12 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr, return err; } - + cmd = kzalloc(len, GFP_KERNEL); + if (!cmd) + return ERR_PTR(-ENOMEM); err = list_first_entry(&tr->err_log, struct tracing_log_err, list); kfree(err->cmd); - err->cmd = kzalloc(len, GFP_KERNEL); - if (!err->cmd) - return ERR_PTR(-ENOMEM); + err->cmd = cmd; list_del(&err->list); return err; From 22b29557aef3c9d673c887911b504c6d47009de4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 15 Nov 2022 14:10:44 -0800 Subject: [PATCH 424/963] selftests: mptcp: gives slow test-case more time On slow or busy VM, some test-cases still fail because the data transfer completes before the endpoint manipulation actually took effect. Address the issue by artificially increasing the runtime for the relevant test-cases. Fixes: ef360019db40 ("selftests: mptcp: signal addresses testcases") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/309 Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f3dd5f2a0272..2eeaf4aca644 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2152,7 +2152,7 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 speed_10 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -2165,7 +2165,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert @@ -2178,7 +2178,7 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow + run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert From 7e68d31020f18f8d695d5f143fc16cdaa96166cb Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 15 Nov 2022 14:10:45 -0800 Subject: [PATCH 425/963] selftests: mptcp: run mptcp_sockopt from a new netns Not running it from a new netns causes issues if some MPTCP settings are modified, e.g. if MPTCP is disabled from the sysctl knob, if multiple addresses are available and added to the MPTCP path-manager, etc. In these cases, the created connection will not behave as expected, e.g. unable to create an MPTCP socket, more than one subflow is seen, etc. A new "sandbox" net namespace is now created and used to run mptcp_sockopt from this controlled environment. Fixes: ce9979129a0b ("selftests: mptcp: add mptcp getsockopt test cases") Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 0879da915014..80d36f7cfee8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -35,8 +35,9 @@ init() ns1="ns1-$rndh" ns2="ns2-$rndh" + ns_sbox="ns_sbox-$rndh" - for netns in "$ns1" "$ns2";do + for netns in "$ns1" "$ns2" "$ns_sbox";do ip netns add $netns || exit $ksft_skip ip -net $netns link set lo up ip netns exec $netns sysctl -q net.mptcp.enabled=1 @@ -73,7 +74,7 @@ init() cleanup() { - for netns in "$ns1" "$ns2"; do + for netns in "$ns1" "$ns2" "$ns_sbox"; do ip netns del $netns done rm -f "$cin" "$cout" @@ -243,7 +244,7 @@ do_mptcp_sockopt_tests() { local lret=0 - ./mptcp_sockopt + ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? if [ $lret -ne 0 ]; then @@ -252,7 +253,7 @@ do_mptcp_sockopt_tests() return fi - ./mptcp_sockopt -6 + ip netns exec "$ns_sbox" ./mptcp_sockopt -6 lret=$? if [ $lret -ne 0 ]; then From 3de88b95c4d436d78afc0266a0bed76c35ddeb62 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 15 Nov 2022 14:10:46 -0800 Subject: [PATCH 426/963] selftests: mptcp: fix mibit vs mbit mix up The estimated time was supposing the rate was expressed in mibit (bit * 1024^2) but it is in mbit (bit * 1000^2). This makes the threshold higher but in a more realistic way to avoid false positives reported by CI instances. Before this patch, the thresholds were at 7561/4005ms and now they are at 7906/4178ms. While at it, also fix a typo in the linked comment, spotted by Mat. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/310 Fixes: 1a418cb8e888 ("mptcp: simult flow self-tests") Suggested-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/simult_flows.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ffa13a957a36..40aeb5a71a2a 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -247,9 +247,10 @@ run_test() tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 - # time is measured in ms, account for transfer size, affegated link speed + # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%) - local time=$((size * 8 * 1000 * 10 / (( $rate1 + $rate2) * 1024 *1024 * 9) )) + # ms byte -> bit 10% mbit -> kbit -> bit 10% + local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) )) # mptcp_connect will do some sleeps to allow the mp_join handshake # completion (see mptcp_connect): 200ms on each side, add some slack From c2418f911a31a266af4fbaca998dc73d3676475a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 20 Oct 2022 15:23:40 +0100 Subject: [PATCH 427/963] gpu: host1x: Avoid trying to use GART on Tegra20 Since commit c7e3ca515e78 ("iommu/tegra: gart: Do not register with bus") quite some time ago, the GART driver has effectively disabled itself to avoid issues with the GPU driver expecting it to work in ways that it doesn't. As of commit 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") that bodge no longer works, but really the GPU driver should be responsible for its own behaviour anyway. Make the workaround explicit. Reported-by: Jon Hunter Suggested-by: Dmitry Osipenko Signed-off-by: Robin Murphy Tested-by: Jon Hunter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/drm.c | 4 ++++ drivers/gpu/host1x/dev.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 6748ec1e0005..a1f909dac89a 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1093,6 +1093,10 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) struct host1x *host1x = dev_get_drvdata(dev->dev.parent); struct iommu_domain *domain; + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + /* * If the Tegra DRM clients are backed by an IOMMU, push buffers are * likely to be allocated beyond the 32-bit boundary if sufficient diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 0cd3f97e7e49..f60ea24db0ec 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -292,6 +292,10 @@ static void host1x_setup_virtualization_tables(struct host1x *host) static bool host1x_wants_iommu(struct host1x *host1x) { + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + /* * If we support addressing a maximum of 32 bits of physical memory * and if the host1x firewall is enabled, there's no need to enable From 489d144563f23911262a652234b80c70c89c978b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=B6hle?= Date: Thu, 17 Nov 2022 14:42:09 +0000 Subject: [PATCH 428/963] mmc: core: Fix ambiguous TRIM and DISCARD arg Clean up the MMC_TRIM_ARGS define that became ambiguous with DISCARD introduction. While at it, let's fix one usage where MMC_TRIM_ARGS falsely included DISCARD too. Fixes: b3bf915308ca ("mmc: core: new discard feature support at eMMC v4.5") Signed-off-by: Christian Loehle Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/11376b5714964345908f3990f17e0701@hyperstone.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 9 +++++++-- include/linux/mmc/mmc.h | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index c5de202f530a..de1cc9e1ae57 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1484,6 +1484,11 @@ void mmc_init_erase(struct mmc_card *card) card->pref_erase = 0; } +static bool is_trim_arg(unsigned int arg) +{ + return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; +} + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, unsigned int arg, unsigned int qty) { @@ -1766,7 +1771,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) return -EOPNOTSUPP; - if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && + if (mmc_card_mmc(card) && is_trim_arg(arg) && !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) return -EOPNOTSUPP; @@ -1796,7 +1801,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, * identified by the card->eg_boundary flag. */ rem = card->erase_size - (from % card->erase_size); - if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { + if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { err = mmc_do_erase(card, from, from + rem - 1, arg); from += rem; if ((err) || (to <= from)) diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9c50bc40f8ff..6f7993803ee7 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) From 733d4bbf9514890eb53ebe75827bf1fb4fd25ebe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 15 Nov 2022 19:34:39 +0200 Subject: [PATCH 429/963] net: liquidio: simplify if expression Fix the warning reported by kbuild: cocci warnings: (new ones prefixed by >>) >> drivers/net/ethernet/cavium/liquidio/lio_main.c:1797:54-56: WARNING !A || A && B is equivalent to !A || B drivers/net/ethernet/cavium/liquidio/lio_main.c:1827:54-56: WARNING !A || A && B is equivalent to !A || B Fixes: 8979f428a4af ("net: liquidio: release resources when liquidio driver open failed") Reported-by: kernel test robot Signed-off-by: Leon Romanovsky Reviewed-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 75771825c3f9..98793b2ac2c7 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1794,7 +1794,7 @@ static int liquidio_open(struct net_device *netdev) ifstate_set(lio, LIO_IFSTATE_RUNNING); - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) { + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) { ret = setup_tx_poll_fn(netdev); if (ret) goto err_poll; @@ -1824,7 +1824,7 @@ static int liquidio_open(struct net_device *netdev) return 0; err_rx_ctrl: - if (!OCTEON_CN23XX_PF(oct) || (OCTEON_CN23XX_PF(oct) && !oct->msix_on)) + if (!OCTEON_CN23XX_PF(oct) || !oct->msix_on) cleanup_tx_poll_fn(netdev); err_poll: if (lio->ptp_clock) { From 8207f253a097fe15c93d85ac15ebb73c5e39e1e1 Mon Sep 17 00:00:00 2001 From: Thomas Zeitlhofer Date: Tue, 15 Nov 2022 23:09:41 +0100 Subject: [PATCH 430/963] net: neigh: decrement the family specific qlen Commit 0ff4eb3d5ebb ("neighbour: make proxy_queue.qlen limit per-device") introduced the length counter qlen in struct neigh_parms. There are separate neigh_parms instances for IPv4/ARP and IPv6/ND, and while the family specific qlen is incremented in pneigh_enqueue(), the mentioned commit decrements always the IPv4/ARP specific qlen, regardless of the currently processed family, in pneigh_queue_purge() and neigh_proxy_process(). As a result, with IPv6/ND, the family specific qlen is only incremented (and never decremented) until it exceeds PROXY_QLEN, and then, according to the check in pneigh_enqueue(), neighbor solicitations are not answered anymore. As an example, this is noted when using the subnet-router anycast address to access a Linux router. After a certain amount of time (in the observed case, qlen exceeded PROXY_QLEN after two days), the Linux router stops answering neighbor solicitations for its subnet-router anycast address and effectively becomes unreachable. Another result with IPv6/ND is that the IPv4/ARP specific qlen is decremented more often than incremented. This leads to negative qlen values, as a signed integer has been used for the length counter qlen, and potentially to an integer overflow. Fix this by introducing the helper function neigh_parms_qlen_dec(), which decrements the family specific qlen. Thereby, make use of the existing helper function neigh_get_dev_parms_rcu(), whose definition therefore needs to be placed earlier in neighbour.c. Take the family member from struct neigh_table to determine the currently processed family and appropriately call neigh_parms_qlen_dec() from pneigh_queue_purge() and neigh_proxy_process(). Additionally, use an unsigned integer for the length counter qlen. Fixes: 0ff4eb3d5ebb ("neighbour: make proxy_queue.qlen limit per-device") Signed-off-by: Thomas Zeitlhofer Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- net/core/neighbour.c | 58 +++++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 20745cf7ae1a..2f2a6023fb0e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -83,7 +83,7 @@ struct neigh_parms { struct rcu_head rcu_head; int reachable_time; - int qlen; + u32 qlen; int data[NEIGH_VAR_DATA_MAX]; DECLARE_BITMAP(data_state, NEIGH_VAR_DATA_MAX); }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a77a85e357e0..952a54763358 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -307,7 +307,31 @@ static int neigh_del_timer(struct neighbour *n) return 0; } -static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) +static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, + int family) +{ + switch (family) { + case AF_INET: + return __in_dev_arp_parms_get_rcu(dev); + case AF_INET6: + return __in6_dev_nd_parms_get_rcu(dev); + } + return NULL; +} + +static void neigh_parms_qlen_dec(struct net_device *dev, int family) +{ + struct neigh_parms *p; + + rcu_read_lock(); + p = neigh_get_dev_parms_rcu(dev, family); + if (p) + p->qlen--; + rcu_read_unlock(); +} + +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net, + int family) { struct sk_buff_head tmp; unsigned long flags; @@ -321,13 +345,7 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) struct net_device *dev = skb->dev; if (net == NULL || net_eq(dev_net(dev), net)) { - struct in_device *in_dev; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, family); __skb_unlink(skb, list); __skb_queue_tail(&tmp, skb); } @@ -409,7 +427,8 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev, skip_perm); pneigh_ifdown_and_unlock(tbl, dev); - pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); + pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL, + tbl->family); if (skb_queue_empty_lockless(&tbl->proxy_queue)) del_timer_sync(&tbl->proxy_timer); return 0; @@ -1621,13 +1640,8 @@ static void neigh_proxy_process(struct timer_list *t) if (tdif <= 0) { struct net_device *dev = skb->dev; - struct in_device *in_dev; - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dev); - if (in_dev) - in_dev->arp_parms->qlen--; - rcu_read_unlock(); + neigh_parms_qlen_dec(dev, tbl->family); __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) { @@ -1821,7 +1835,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue, NULL); + pneigh_queue_purge(&tbl->proxy_queue, NULL, tbl->family); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n"); @@ -3539,18 +3553,6 @@ static int proc_unres_qlen(struct ctl_table *ctl, int write, return ret; } -static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, - int family) -{ - switch (family) { - case AF_INET: - return __in_dev_arp_parms_get_rcu(dev); - case AF_INET6: - return __in6_dev_nd_parms_get_rcu(dev); - } - return NULL; -} - static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, int index) { From 40b9d1ab63f5c4f3cb69450044d07b45e5af72e1 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Tue, 15 Nov 2022 17:19:14 -0800 Subject: [PATCH 431/963] ipvlan: hold lower dev to avoid possible use-after-free Recently syzkaller discovered the issue of disappearing lower device (NETDEV_UNREGISTER) while the virtual device (like macvlan) is still having it as a lower device. So it's just a matter of time similar discovery will be made for IPvlan device setup. So fixing it preemptively. Also while at it, add a refcount tracker. Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan.h | 1 + drivers/net/ipvlan/ipvlan_main.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index de94921cbef9..025e0c19ec25 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -98,6 +98,7 @@ struct ipvl_port { struct sk_buff_head backlog; int count; struct ida ida; + netdevice_tracker dev_tracker; }; struct ipvl_skb_cb { diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 54c94a69c2bb..796a38f9d7b2 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -83,6 +83,7 @@ static int ipvlan_port_create(struct net_device *dev) if (err) goto err; + netdev_hold(dev, &port->dev_tracker, GFP_KERNEL); return 0; err: @@ -95,6 +96,7 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); struct sk_buff *skb; + netdev_put(dev, &port->dev_tracker); if (port->mode == IPVLAN_MODE_L3S) ipvlan_l3s_unregister(port); netdev_rx_handler_unregister(dev); From f5f8ad3fcdc49e4d794973007525ed864f93f3fb Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 17 Nov 2022 17:21:20 -0600 Subject: [PATCH 432/963] ASoC: SOF: dai: move AMD_HS to end of list to restore backwards-compatibility The addition of AMD_HS breaks Mediatek platforms by using an index previously allocated to Mediatek. This is a backwards-compatibility issue and needs to be fixed. All firmware released by AMD needs to be re-generated and re-distributed. Fixes: ed2562c64b4f ("ASoC: SOF: Adding amd HS functionality to the sof core") Link: https://github.com/thesofproject/sof/issues/6615 Link: https://lore.kernel.org/alsa-devel/36a45c7a-820a-7675-d740-c0e83ae2c417@collabora.com/ Reported-by: AngeloGioacchino Del Regno Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Daniel Baluta Reviewed-by: Basavaraj Hiregoudar Reviewed-by: V sujith kumar Reddy Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221117232120.112639-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof/dai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h index 83fd81c82e4c..9fbd3832bcdc 100644 --- a/include/sound/sof/dai.h +++ b/include/sound/sof/dai.h @@ -84,8 +84,8 @@ enum sof_ipc_dai_type { SOF_DAI_AMD_BT, /**< AMD ACP BT*/ SOF_DAI_AMD_SP, /**< AMD ACP SP */ SOF_DAI_AMD_DMIC, /**< AMD ACP DMIC */ - SOF_DAI_AMD_HS, /**< Amd HS */ SOF_DAI_MEDIATEK_AFE, /**< Mediatek AFE */ + SOF_DAI_AMD_HS, /**< Amd HS */ }; /* general purpose DAI configuration */ From e85e9e0d8cb759013d6474011c227f92e442d746 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 16 Nov 2022 17:49:30 +0100 Subject: [PATCH 433/963] spi: spi-imx: spi_imx_transfer_one(): check for DMA transfer first The SPI framework checks for each transfer (with the struct spi_controller::can_dma callback) whether the driver wants to use DMA for the transfer. If the driver returns true, the SPI framework will map the transfer's data to the device, start the actual transfer and map the data back. In commit 07e759387788 ("spi: spi-imx: add PIO polling support") the spi-imx driver's spi_imx_transfer_one() function was extended. If the estimated duration of a transfer does not exceed a configurable duration, a polling transfer function is used. This check happens before checking if the driver decided earlier for a DMA transfer. If spi_imx_can_dma() decided to use a DMA transfer, and the user configured a big maximum polling duration, a polling transfer will be used. The DMA unmap after the transfer destroys the transferred data. To fix this problem check in spi_imx_transfer_one() if the driver decided for DMA transfer first, then check the limits for a polling transfer. Fixes: 07e759387788 ("spi: spi-imx: add PIO polling support") Link: https://lore.kernel.org/all/20221111003032.82371-1-festevam@gmail.com Reported-by: Frieder Schrempf Reported-by: Fabio Estevam Tested-by: Fabio Estevam Cc: David Jander Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Tested-by: Frieder Schrempf Reviewed-by: Frieder Schrempf Link: https://lore.kernel.org/r/20221116164930.855362-1-mkl@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 468ce0a2b282..d209930069cf 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1606,6 +1606,13 @@ static int spi_imx_transfer_one(struct spi_controller *controller, if (spi_imx->slave_mode) return spi_imx_pio_transfer_slave(spi, transfer); + /* + * If we decided in spi_imx_can_dma() that we want to do a DMA + * transfer, the SPI transfer has already been mapped, so we + * have to do the DMA transfer here. + */ + if (spi_imx->usedma) + return spi_imx_dma_transfer(spi_imx, transfer); /* * Calculate the estimated time in us the transfer runs. Find * the number of Hz per byte per polling limit. @@ -1617,9 +1624,6 @@ static int spi_imx_transfer_one(struct spi_controller *controller, if (transfer->len < byte_limit) return spi_imx_poll_transfer(spi, transfer); - if (spi_imx->usedma) - return spi_imx_dma_transfer(spi_imx, transfer); - return spi_imx_pio_transfer(spi, transfer); } From 302e57f809be8b678d1ab0b2634504d5d51a166d Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Thu, 17 Nov 2022 10:45:03 +0800 Subject: [PATCH 434/963] selftests/net: fix missing xdp_dummy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit afef88e65554 ("selftests/bpf: Store BPF object files with .bpf.o extension"), we should use xdp_dummy.bpf.o instade of xdp_dummy.o. In addition, use the BPF_FILE variable to save the BPF object file name, which can be better identified and modified. Fixes: afef88e65554 ("selftests/bpf: Store BPF object files with .bpf.o extension") Signed-off-by: Wang Yufen Cc: Daniel Müller Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro.sh | 8 +++++--- tools/testing/selftests/net/udpgro_bench.sh | 8 +++++--- tools/testing/selftests/net/udpgro_frglist.sh | 8 +++++--- tools/testing/selftests/net/udpgro_fwd.sh | 3 ++- tools/testing/selftests/net/veth.sh | 11 ++++++----- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 6a443ca3cd3a..0c743752669a 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + # set global exit status, but never reset nonzero one. check_err() { @@ -34,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp } run_one() { @@ -195,8 +197,8 @@ run_all() { return $ret } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 8a1109a545db..894972877e8b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -34,7 +36,7 @@ run_one() { ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & @@ -80,8 +82,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 7fe85ba51075..c9c4b9d65839 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -5,6 +5,8 @@ readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +BPF_FILE="../bpf/xdp_dummy.bpf.o" + cleanup() { local -r jobs="$(jobs -p)" local -r ns="$(ip netns list|grep $PEER_NS)" @@ -36,7 +38,7 @@ run_one() { ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp + ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action @@ -81,8 +83,8 @@ run_all() { run_udp "${ipv6_args}" } -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 1bcd82e1f662..c079565add39 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,6 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 @@ -46,7 +47,7 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null + ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null } create_vxlan_endpoint() { diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 430895d1a2b6..2d073595c620 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,6 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +BPF_FILE="../bpf/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -216,8 +217,8 @@ while getopts "hs:" option; do esac done -if [ ! -f ../bpf/xdp_dummy.o ]; then - echo "Missing xdp_dummy helper. Build bpf selftest first" +if [ ! -f ${BPF_FILE} ]; then + echo "Missing ${BPF_FILE}. Build bpf selftest first" exit 1 fi @@ -288,14 +289,14 @@ if [ $CPUS -gt 1 ]; then ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null printf "%-60s" "bad setting: XDP with RX nr less than TX" - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " # the following tests will run with multiple channels active ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 ip netns exec $NS_DST ethtool -L veth$DST rx 2 - ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ + ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \ section xdp 2>/dev/null printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ @@ -311,7 +312,7 @@ if [ $CPUS -gt 2 ]; then chk_channels "setting invalid channels nr" $DST 2 2 fi -ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC off From 3bcd6c7eaa53b56c3f584da46a1f7652e759d0e5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Nov 2022 14:02:28 +0000 Subject: [PATCH 435/963] rxrpc: Fix race between conn bundle lookup and bundle removal [ZDI-CAN-15975] After rxrpc_unbundle_conn() has removed a connection from a bundle, it checks to see if there are any conns with available channels and, if not, removes and attempts to destroy the bundle. Whilst it does check after grabbing client_bundles_lock that there are no connections attached, this races with rxrpc_look_up_bundle() retrieving the bundle, but not attaching a connection for the connection to be attached later. There is therefore a window in which the bundle can get destroyed before we manage to attach a new connection to it. Fix this by adding an "active" counter to struct rxrpc_bundle: (1) rxrpc_connect_call() obtains an active count by prepping/looking up a bundle and ditches it before returning. (2) If, during rxrpc_connect_call(), a connection is added to the bundle, this obtains an active count, which is held until the connection is discarded. (3) rxrpc_deactivate_bundle() is created to drop an active count on a bundle and destroy it when the active count reaches 0. The active count is checked inside client_bundles_lock() to prevent a race with rxrpc_look_up_bundle(). (4) rxrpc_unbundle_conn() then calls rxrpc_deactivate_bundle(). Fixes: 245500d853e9 ("rxrpc: Rewrite the client connection manager") Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-15975 Signed-off-by: David Howells Tested-by: zdi-disclosures@trendmicro.com cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: David S. Miller --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/conn_client.c | 38 +++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1ad0ec5afb50..8499ceb7719c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -399,6 +399,7 @@ enum rxrpc_conn_proto_state { struct rxrpc_bundle { struct rxrpc_conn_parameters params; refcount_t ref; + atomic_t active; /* Number of active users */ unsigned int debug_id; bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 3c9eeb5b750c..bdb335cb2d05 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -40,6 +40,8 @@ __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ; DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The @@ -123,6 +125,7 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, bundle->params = *cp; rxrpc_get_peer(bundle->params.peer); refcount_set(&bundle->ref, 1); + atomic_set(&bundle->active, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); } @@ -149,7 +152,7 @@ void rxrpc_put_bundle(struct rxrpc_bundle *bundle) dead = __refcount_dec_and_test(&bundle->ref, &r); - _debug("PUT B=%x %d", d, r); + _debug("PUT B=%x %d", d, r - 1); if (dead) rxrpc_free_bundle(bundle); } @@ -338,6 +341,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c rxrpc_free_bundle(candidate); found_bundle: rxrpc_get_bundle(bundle); + atomic_inc(&bundle->active); spin_unlock(&local->client_bundles_lock); _leave(" = %u [found]", bundle->debug_id); return bundle; @@ -435,6 +439,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) if (old) trace_rxrpc_client(old, -1, rxrpc_client_replace); candidate->bundle_shift = shift; + atomic_inc(&bundle->active); bundle->conns[i] = candidate; for (j = 0; j < RXRPC_MAXCALLS; j++) set_bit(shift + j, &bundle->avail_chans); @@ -725,6 +730,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx, smp_rmb(); out_put_bundle: + rxrpc_deactivate_bundle(bundle); rxrpc_put_bundle(bundle); out: _leave(" = %d", ret); @@ -900,9 +906,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) { struct rxrpc_bundle *bundle = conn->bundle; - struct rxrpc_local *local = bundle->params.local; unsigned int bindex; - bool need_drop = false, need_put = false; + bool need_drop = false; int i; _enter("C=%x", conn->debug_id); @@ -921,15 +926,22 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) } spin_unlock(&bundle->channel_lock); - /* If there are no more connections, remove the bundle */ - if (!bundle->avail_chans) { - _debug("maybe unbundle"); - spin_lock(&local->client_bundles_lock); + if (need_drop) { + rxrpc_deactivate_bundle(bundle); + rxrpc_put_connection(conn); + } +} - for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) - if (bundle->conns[i]) - break; - if (i == ARRAY_SIZE(bundle->conns) && !bundle->params.exclusive) { +/* + * Drop the active count on a bundle. + */ +static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) +{ + struct rxrpc_local *local = bundle->params.local; + bool need_put = false; + + if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { + if (!bundle->params.exclusive) { _debug("erase bundle"); rb_erase(&bundle->local_node, &local->client_bundles); need_put = true; @@ -939,10 +951,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (need_put) rxrpc_put_bundle(bundle); } - - if (need_drop) - rxrpc_put_connection(conn); - _leave(""); } /* From 24deec6b9e4a051635f75777844ffc184644fec9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 16 Nov 2022 12:06:53 +0200 Subject: [PATCH 436/963] net: dsa: sja1105: disallow C45 transactions on the BASE-TX MDIO bus You'd think people know that the internal 100BASE-TX PHY on the SJA1110 responds only to clause 22 MDIO transactions, but they don't :) When a clause 45 transaction is attempted, sja1105_base_tx_mdio_read() and sja1105_base_tx_mdio_write() don't expect "reg" to contain bit 30 set (MII_ADDR_C45) and pack this value into the SPI transaction buffer. But the field in the SPI buffer has a width smaller than 30 bits, so we see this confusing message from the packing() API rather than a proper rejection of C45 transactions: Call trace: dump_stack+0x1c/0x38 sja1105_pack+0xbc/0xc0 [sja1105] sja1105_xfer+0x114/0x2b0 [sja1105] sja1105_xfer_u32+0x44/0xf4 [sja1105] sja1105_base_tx_mdio_read+0x44/0x7c [sja1105] mdiobus_read+0x44/0x80 get_phy_c45_ids+0x70/0x234 get_phy_device+0x68/0x15c fwnode_mdiobus_register_phy+0x74/0x240 of_mdiobus_register+0x13c/0x380 sja1105_mdiobus_register+0x368/0x490 [sja1105] sja1105_setup+0x94/0x119c [sja1105] Cannot store 401d2405 inside bits 24-4 (would truncate) Fixes: 5a8f09748ee7 ("net: dsa: sja1105: register the MDIO buses for 100base-T1 and 100base-TX") Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_mdio.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 215dd17ca790..4059fcc8c832 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -256,6 +256,9 @@ static int sja1105_base_tx_mdio_read(struct mii_bus *bus, int phy, int reg) u32 tmp; int rc; + if (reg & MII_ADDR_C45) + return -EOPNOTSUPP; + rc = sja1105_xfer_u32(priv, SPI_READ, regs->mdio_100base_tx + reg, &tmp, NULL); if (rc < 0) @@ -272,6 +275,9 @@ static int sja1105_base_tx_mdio_write(struct mii_bus *bus, int phy, int reg, const struct sja1105_regs *regs = priv->info->regs; u32 tmp = val; + if (reg & MII_ADDR_C45) + return -EOPNOTSUPP; + return sja1105_xfer_u32(priv, SPI_WRITE, regs->mdio_100base_tx + reg, &tmp, NULL); } From 0ad6bded175e829c2ca261529c9dce39a32a042d Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 16 Nov 2022 21:02:49 +0800 Subject: [PATCH 437/963] nfc/nci: fix race with opening and closing Previously we leverage NCI_UNREG and the lock inside nci_close_device to prevent the race condition between opening a device and closing a device. However, it still has problem because a failed opening command will erase the NCI_UNREG flag and allow another opening command to bypass the status checking. This fix corrects that by making sure the NCI_UNREG is held. Reported-by: syzbot+43475bf3cfbd6e41f5b7@syzkaller.appspotmail.com Fixes: 48b71a9e66c2 ("NFC: add NCI_UNREG flag to eliminate the race") Signed-off-by: Lin Ma Signed-off-by: David S. Miller --- net/nfc/nci/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6a193cce2a75..4ffdf2f45c44 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -542,7 +542,7 @@ static int nci_open_device(struct nci_dev *ndev) skb_queue_purge(&ndev->tx_q); ndev->ops->close(ndev); - ndev->flags = 0; + ndev->flags &= BIT(NCI_UNREG); } done: From 2360f9b8c4e81d242d4cbf99d630a2fffa681fab Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 17 Nov 2022 14:55:27 +0800 Subject: [PATCH 438/963] net: pch_gbe: fix potential memleak in pch_gbe_tx_queue() In pch_gbe_xmit_frame(), NETDEV_TX_OK will be returned whether pch_gbe_tx_queue() sends data successfully or not, so pch_gbe_tx_queue() needs to free skb before returning. But pch_gbe_tx_queue() returns without freeing skb in case of dma_map_single() fails. Add dev_kfree_skb_any() to fix it. Fixes: 77555ee72282 ("net: Add Gigabit Ethernet driver of Topcliff PCH") Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 3f2c30184752..c9ae47128a07 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1143,6 +1143,7 @@ static void pch_gbe_tx_queue(struct pch_gbe_adapter *adapter, buffer_info->dma = 0; buffer_info->time_stamp = 0; tx_ring->next_to_use = ring_num; + dev_kfree_skb_any(skb); return; } buffer_info->mapped = true; From 11c10956515b8ec44cf4f2a7b9d8bf8b9dc05ec4 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 10 Nov 2022 20:26:06 +0800 Subject: [PATCH 439/963] 9p/fd: fix issue of list_del corruption in p9_fd_cancel() Syz reported the following issue: kernel BUG at lib/list_debug.c:53! invalid opcode: 0000 [#1] PREEMPT SMP KASAN RIP: 0010:__list_del_entry_valid.cold+0x5c/0x72 Call Trace: p9_fd_cancel+0xb1/0x270 p9_client_rpc+0x8ea/0xba0 p9_client_create+0x9c0/0xed0 v9fs_session_init+0x1e0/0x1620 v9fs_mount+0xba/0xb80 legacy_get_tree+0x103/0x200 vfs_get_tree+0x89/0x2d0 path_mount+0x4c0/0x1ac0 __x64_sys_mount+0x33b/0x430 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 The process is as follows: Thread A: Thread B: p9_poll_workfn() p9_client_create() ... ... p9_conn_cancel() p9_fd_cancel() list_del() ... ... list_del() //list_del corruption There is no lock protection when deleting list in p9_conn_cancel(). After deleting list in Thread A, thread B will delete the same list again. It will cause issue of list_del corruption. Setting req->status to REQ_STATUS_ERROR under lock prevents other cleanup paths from trying to manipulate req_list. The other thread can safely check req->status because it still holds a reference to req at this point. Link: https://lkml.kernel.org/r/20221110122606.383352-1-shaozhengchao@huawei.com Fixes: 52f1c45dde91 ("9p: trans_fd/p9_conn_cancel: drop client lock earlier") Reported-by: syzbot+9b69b8d10ab4a7d88056@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao [Dominique: add description of the fix in commit message] Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 56a186768750..bd28e63d7666 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -202,9 +202,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); + req->status = REQ_STATUS_ERROR; } spin_unlock(&m->req_lock); From 578b565b240afdfe0596d183f473f333eb9d3008 Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Thu, 17 Nov 2022 17:11:57 +0800 Subject: [PATCH 440/963] 9p/fd: Fix write overflow in p9_read_work This error was reported while fuzzing: BUG: KASAN: slab-out-of-bounds in _copy_to_iter+0xd35/0x1190 Write of size 4043 at addr ffff888008724eb1 by task kworker/1:1/24 CPU: 1 PID: 24 Comm: kworker/1:1 Not tainted 6.1.0-rc5-00002-g1adf73218daa-dirty #223 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014 Workqueue: events p9_read_work Call Trace: dump_stack_lvl+0x4c/0x64 print_report+0x178/0x4b0 kasan_report+0xae/0x130 kasan_check_range+0x179/0x1e0 memcpy+0x38/0x60 _copy_to_iter+0xd35/0x1190 copy_page_to_iter+0x1d5/0xb00 pipe_read+0x3a1/0xd90 __kernel_read+0x2a5/0x760 kernel_read+0x47/0x60 p9_read_work+0x463/0x780 process_one_work+0x91d/0x1300 worker_thread+0x8c/0x1210 kthread+0x280/0x330 ret_from_fork+0x22/0x30 Allocated by task 457: kasan_save_stack+0x1c/0x40 kasan_set_track+0x21/0x30 __kasan_kmalloc+0x7e/0x90 __kmalloc+0x59/0x140 p9_fcall_init.isra.11+0x5d/0x1c0 p9_tag_alloc+0x251/0x550 p9_client_prepare_req+0x162/0x350 p9_client_rpc+0x18d/0xa90 p9_client_create+0x670/0x14e0 v9fs_session_init+0x1fd/0x14f0 v9fs_mount+0xd7/0xaf0 legacy_get_tree+0xf3/0x1f0 vfs_get_tree+0x86/0x2c0 path_mount+0x885/0x1940 do_mount+0xec/0x100 __x64_sys_mount+0x1a0/0x1e0 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd This BUG pops up when trying to reproduce https://syzkaller.appspot.com/bug?id=6c7cd46c7bdd0e86f95d26ec3153208ad186f9fa The callstack is different but the issue is valid and re-producable with the same re-producer in the link. The root cause of this issue is that we check the size of the message received against the msize of the client in p9_read_work. However, it turns out that capacity is no longer consistent with msize. Thus, the message size should be checked against sdata capacity. As the msize is non-consistant with the capacity of the tag and as we are now checking message size against capacity directly, there is no point checking message size against msize. So remove it. Link: https://lkml.kernel.org/r/20221117091159.31533-2-guozihua@huawei.com Link: https://lkml.kernel.org/r/20221117091159.31533-3-guozihua@huawei.com Reported-by: syzbot+0f89bd13eaceccc0e126@syzkaller.appspotmail.com Fixes: 60ece0833b6c ("net/9p: allocate appropriate reduced message buffers") Signed-off-by: GUO Zihua Reviewed-by: Christian Schoenebeck [Dominique: squash patches 1 & 2 and fix size including header part] Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index bd28e63d7666..27c5c938ccd1 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -324,14 +324,6 @@ static void p9_read_work(struct work_struct *work) goto error; } - if (m->rc.size >= m->client->msize) { - p9_debug(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", - m->rc.size); - err = -EIO; - goto error; - } - p9_debug(P9_DEBUG_TRANS, "mux %p pkt: size: %d bytes tag: %d\n", m, m->rc.size, m->rc.tag); @@ -344,6 +336,14 @@ static void p9_read_work(struct work_struct *work) goto error; } + if (m->rc.size > m->rreq->rc.capacity) { + p9_debug(P9_DEBUG_ERROR, + "requested packet size too big: %d for tag %d with capacity %zd\n", + m->rc.size, m->rc.tag, m->rreq->rc.capacity); + err = -EIO; + goto error; + } + if (!m->rreq->rc.sdata) { p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", From 6854fadbeee10891ed74246bdc05031906b6c8cf Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Thu, 17 Nov 2022 17:11:59 +0800 Subject: [PATCH 441/963] 9p/fd: Use P9_HDRSZ for header size Cleanup hardcoded header sizes to use P9_HDRSZ instead of '7' Link: https://lkml.kernel.org/r/20221117091159.31533-4-guozihua@huawei.com Signed-off-by: GUO Zihua Reviewed-by: Christian Schoenebeck [Dominique: commit message adjusted to make sense after offset size adjustment got removed] Signed-off-by: Dominique Martinet --- net/9p/trans_fd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 27c5c938ccd1..eeea0a6a75b6 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -120,7 +120,7 @@ struct p9_conn { struct list_head unsent_req_list; struct p9_req_t *rreq; struct p9_req_t *wreq; - char tmp_buf[7]; + char tmp_buf[P9_HDRSZ]; struct p9_fcall rc; int wpos; int wsize; @@ -293,7 +293,7 @@ static void p9_read_work(struct work_struct *work) if (!m->rc.sdata) { m->rc.sdata = m->tmp_buf; m->rc.offset = 0; - m->rc.capacity = 7; /* start by reading header */ + m->rc.capacity = P9_HDRSZ; /* start by reading header */ } clear_bit(Rpending, &m->wsched); @@ -316,7 +316,7 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new header\n"); /* Header size */ - m->rc.size = 7; + m->rc.size = P9_HDRSZ; err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); if (err) { p9_debug(P9_DEBUG_ERROR, From 0b24dfa587c6cc7484cfb170da5c7dd73451f670 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Fri, 18 Nov 2022 14:10:35 +0100 Subject: [PATCH 442/963] regulator: slg51000: Wait after asserting CS pin Sony's downstream driver [1], among some other changes, adds a seemingly random 10ms usleep_range, which turned out to be necessary for the hardware to function properly on at least Sony Xperia 1 IV. Without this, I2C transactions with the SLG51000 straight up fail. Relax (10-10ms -> 10-11ms) and add the aforementioned sleep to make sure the hardware has some time to wake up. (nagara-2.0.0-mlc/vendor/semc/hardware/camera-kernel-module/) [1] https://developer.sony.com/file/download/open-source-archive-for-64-0-m-4-29/ Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20221118131035.54874-1-konrad.dybcio@linaro.org Signed-off-by: Mark Brown --- drivers/regulator/slg51000-regulator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c index 75a941fb3c2b..1b2eee95ad3f 100644 --- a/drivers/regulator/slg51000-regulator.c +++ b/drivers/regulator/slg51000-regulator.c @@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client) chip->cs_gpiod = cs_gpiod; } + usleep_range(10000, 11000); + i2c_set_clientdata(client, chip); chip->chip_irq = client->irq; chip->dev = dev; From 52d1aa8b8249ff477aaa38b6f74a8ced780d079c Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 9 Nov 2022 12:39:07 -0700 Subject: [PATCH 443/963] netfilter: conntrack: Fix data-races around ct mark nf_conn:mark can be read from and written to in parallel. Use READ_ONCE()/WRITE_ONCE() for reads and writes to prevent unwanted compiler optimizations. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Daniel Xu Signed-off-by: Pablo Neira Ayuso --- net/core/flow_dissector.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 4 ++-- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 24 ++++++++++++++---------- net/netfilter/nf_conntrack_standalone.c | 2 +- net/netfilter/nft_ct.c | 6 +++--- net/netfilter/xt_connmark.c | 18 ++++++++++-------- net/openvswitch/conntrack.c | 8 ++++---- net/sched/act_connmark.c | 4 ++-- net/sched/act_ct.c | 8 ++++---- net/sched/act_ctinfo.c | 6 +++--- 11 files changed, 45 insertions(+), 39 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 25cd35f5922e..007730412947 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -296,7 +296,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb, key->ct_zone = ct->zone.id; #endif #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - key->ct_mark = ct->mark; + key->ct_mark = READ_ONCE(ct->mark); #endif cl = nf_ct_labels_find(ct); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f8e176c77d1c..b3cc416ed292 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -435,7 +435,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) switch (ctinfo) { case IP_CT_NEW: - ct->mark = hash; + WRITE_ONCE(ct->mark, hash); break; case IP_CT_RELATED: case IP_CT_RELATED_REPLY: @@ -452,7 +452,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) #ifdef DEBUG nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); + pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark)); if (!clusterip_responsible(cipinfo->config, hash)) { pr_debug("not responsible\n"); return NF_DROP; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f97bda06d2a9..2692139ce417 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1781,7 +1781,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } #ifdef CONFIG_NF_CONNTRACK_MARK - ct->mark = exp->master->mark; + ct->mark = READ_ONCE(exp->master->mark); #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK ct->secmark = exp->master->secmark; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7562b215b932..d71150a40fb0 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -328,9 +328,9 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) +static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark) { - if (nla_put_be32(skb, CTA_MARK, htonl(ct->mark))) + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; @@ -543,7 +543,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -722,6 +722,7 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) struct sk_buff *skb; unsigned int type; unsigned int flags = 0, group; + u32 mark; int err; if (events & (1 << IPCT_DESTROY)) { @@ -826,8 +827,9 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((events & (1 << IPCT_MARK) || ct->mark) - && ctnetlink_dump_mark(skb, ct) < 0) + mark = READ_ONCE(ct->mark); + if ((events & (1 << IPCT_MARK) || mark) && + ctnetlink_dump_mark(skb, mark) < 0) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -1154,7 +1156,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data) } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((ct->mark & filter->mark.mask) != filter->mark.val) + if ((READ_ONCE(ct->mark) & filter->mark.mask) != filter->mark.val) goto ignore_entry; #endif status = (u32)READ_ONCE(ct->status); @@ -2002,9 +2004,9 @@ static void ctnetlink_change_mark(struct nf_conn *ct, mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK])); mark = ntohl(nla_get_be32(cda[CTA_MARK])); - newmark = (ct->mark & mask) ^ mark; - if (newmark != ct->mark) - ct->mark = newmark; + newmark = (READ_ONCE(ct->mark) & mask) ^ mark; + if (newmark != READ_ONCE(ct->mark)) + WRITE_ONCE(ct->mark, newmark); } #endif @@ -2669,6 +2671,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; + u32 mark; zone = nf_ct_zone(ct); @@ -2730,7 +2733,8 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) + mark = READ_ONCE(ct->mark); + if (mark && ctnetlink_dump_mark(skb, mark) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4ffe84c5a82c..bca839ab1ae8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -366,7 +366,7 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #if defined(CONFIG_NF_CONNTRACK_MARK) - seq_printf(s, "mark=%u ", ct->mark); + seq_printf(s, "mark=%u ", READ_ONCE(ct->mark)); #endif ct_show_secctx(s, ct); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a3f01f209a53..641dc21f92b4 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -98,7 +98,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - *dest = ct->mark; + *dest = READ_ONCE(ct->mark); return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK @@ -297,8 +297,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr, switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - if (ct->mark != value) { - ct->mark = value; + if (READ_ONCE(ct->mark) != value) { + WRITE_ONCE(ct->mark, value); nf_conntrack_event_cache(IPCT_MARK, ct); } break; diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index e5ebc0810675..ad3c033db64e 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -30,6 +30,7 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) u_int32_t new_targetmark; struct nf_conn *ct; u_int32_t newmark; + u_int32_t oldmark; ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) @@ -37,14 +38,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) switch (info->mode) { case XT_CONNMARK_SET: - newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; + oldmark = READ_ONCE(ct->mark); + newmark = (oldmark & ~info->ctmask) ^ info->ctmark; if (info->shift_dir == D_SHIFT_RIGHT) newmark >>= info->shift_bits; else newmark <<= info->shift_bits; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; @@ -55,15 +57,15 @@ connmark_tg_shift(struct sk_buff *skb, const struct xt_connmark_tginfo2 *info) else new_targetmark <<= info->shift_bits; - newmark = (ct->mark & ~info->ctmask) ^ + newmark = (READ_ONCE(ct->mark) & ~info->ctmask) ^ new_targetmark; - if (ct->mark != newmark) { - ct->mark = newmark; + if (READ_ONCE(ct->mark) != newmark) { + WRITE_ONCE(ct->mark, newmark); nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: - new_targetmark = (ct->mark & info->ctmask); + new_targetmark = (READ_ONCE(ct->mark) & info->ctmask); if (info->shift_dir == D_SHIFT_RIGHT) new_targetmark >>= info->shift_bits; else @@ -126,7 +128,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL) return false; - return ((ct->mark & info->mask) == info->mark) ^ info->invert; + return ((READ_ONCE(ct->mark) & info->mask) == info->mark) ^ info->invert; } static int connmark_mt_check(const struct xt_mtchk_param *par) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c7b10234cf7c..c8eaf4234b2e 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -152,7 +152,7 @@ static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo) static u32 ovs_ct_get_mark(const struct nf_conn *ct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - return ct ? ct->mark : 0; + return ct ? READ_ONCE(ct->mark) : 0; #else return 0; #endif @@ -340,9 +340,9 @@ static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key, #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) u32 new_mark; - new_mark = ct_mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = ct_mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); key->ct.mark = new_mark; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 66b143bb04ac..d41002e4613f 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -61,7 +61,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_get(skb, &ctinfo); if (c) { - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; goto out; @@ -81,7 +81,7 @@ static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, c = nf_ct_tuplehash_to_ctrack(thash); /* using overlimits stats to count how many packets marked */ ca->tcf_qstats.overlimits++; - skb->mark = c->mark; + skb->mark = READ_ONCE(c->mark); nf_ct_put(c); out: diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b38d91d6b249..4c7f7861ea96 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -178,7 +178,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, entry = tcf_ct_flow_table_flow_action_get_next(action); entry->id = FLOW_ACTION_CT_METADATA; #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) - entry->ct_metadata.mark = ct->mark; + entry->ct_metadata.mark = READ_ONCE(ct->mark); #endif ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED : IP_CT_ESTABLISHED_REPLY; @@ -936,9 +936,9 @@ static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) if (!mask) return; - new_mark = mark | (ct->mark & ~(mask)); - if (ct->mark != new_mark) { - ct->mark = new_mark; + new_mark = mark | (READ_ONCE(ct->mark) & ~(mask)); + if (READ_ONCE(ct->mark) != new_mark) { + WRITE_ONCE(ct->mark, new_mark); if (nf_ct_is_confirmed(ct)) nf_conntrack_event_cache(IPCT_MARK, ct); } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index d4102f0a9abd..eaa02f098d1c 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -32,7 +32,7 @@ static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, { u8 dscp, newdscp; - newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) & + newdscp = (((READ_ONCE(ct->mark) & cp->dscpmask) >> cp->dscpmaskshift) << 2) & ~INET_ECN_MASK; switch (proto) { @@ -72,7 +72,7 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct sk_buff *skb) { ca->stats_cpmark_set++; - skb->mark = ct->mark & cp->cpmarkmask; + skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, @@ -130,7 +130,7 @@ static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, } if (cp->mode & CTINFO_MODE_DSCP) - if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask)) + if (!cp->dscpstatemask || (READ_ONCE(ct->mark) & cp->dscpstatemask)) tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto); if (cp->mode & CTINFO_MODE_CPMARK) From 33c7aba0b4ffd6d7cdab862a034eb582a5120a38 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 14 Nov 2022 11:31:54 +0100 Subject: [PATCH 444/963] netfilter: nf_tables: do not set up extensions for end interval Elements with an end interval flag set on do not store extensions. The global set definition is currently setting on the timeout and stateful expression for end interval elements. This leads to skipping end interval elements from the set->ops->walk() path as the expired check bogusly reports true. Moreover, do not set up stateful expressions for elements with end interval flag set on since this is never used. Fixes: 65038428b2c6 ("netfilter: nf_tables: allow to specify stateful expression in set definition") Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e7152d599d73..7a09421f19e1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5958,7 +5958,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, &timeout); if (err) return err; - } else if (set->flags & NFT_SET_TIMEOUT) { + } else if (set->flags & NFT_SET_TIMEOUT && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { timeout = set->timeout; } @@ -6024,7 +6025,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EOPNOTSUPP; goto err_set_elem_expr; } - } else if (set->num_exprs > 0) { + } else if (set->num_exprs > 0 && + !(flags & NFT_SET_ELEM_INTERVAL_END)) { err = nft_set_elem_expr_clone(ctx, set, expr_array); if (err < 0) goto err_set_elem_expr_clone; From 4fe1ec995483737f3d2a14c3fe1d8fe634972979 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 1 Nov 2022 16:53:35 -0400 Subject: [PATCH 445/963] dm ioctl: fix misbehavior if list_versions races with module loading __list_versions will first estimate the required space using the "dm_target_iterate(list_version_get_needed, &needed)" call and then will fill the space using the "dm_target_iterate(list_version_get_info, &iter_info)" call. Each of these calls locks the targets using the "down_read(&_lock)" and "up_read(&_lock)" calls, however between the first and second "dm_target_iterate" there is no lock held and the target modules can be loaded at this point, so the second "dm_target_iterate" call may need more space than what was the first "dm_target_iterate" returned. The code tries to handle this overflow (see the beginning of list_version_get_info), however this handling is incorrect. The code sets "param->data_size = param->data_start + needed" and "iter_info.end = (char *)vers+len" - "needed" is the size returned by the first dm_target_iterate call; "len" is the size of the buffer allocated by userspace. "len" may be greater than "needed"; in this case, the code will write up to "len" bytes into the buffer, however param->data_size is set to "needed", so it may write data past the param->data_size value. The ioctl interface copies only up to param->data_size into userspace, thus part of the result will be truncated. Fix this bug by setting "iter_info.end = (char *)vers + needed;" - this guarantees that the second "dm_target_iterate" call will write only up to the "needed" buffer and it will exit with "DM_BUFFER_FULL_FLAG" if it overflows the "needed" space - in this case, userspace will allocate a larger buffer and retry. Note that there is also a bug in list_version_get_needed - we need to add "strlen(tt->name) + 1" to the needed size, not "strlen(tt->name)". Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 6b3f867d0b70..3bfc1583c20a 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -655,7 +655,7 @@ static void list_version_get_needed(struct target_type *tt, void *needed_param) size_t *needed = needed_param; *needed += sizeof(struct dm_target_versions); - *needed += strlen(tt->name); + *needed += strlen(tt->name) + 1; *needed += ALIGN_MASK; } @@ -720,7 +720,7 @@ static int __list_versions(struct dm_ioctl *param, size_t param_size, const char iter_info.old_vers = NULL; iter_info.vers = vers; iter_info.flags = 0; - iter_info.end = (char *)vers+len; + iter_info.end = (char *)vers + needed; /* * Now loop through filling out the names & versions. From 0dfc1f4ceae86a0d09d880ab87625c86c61ed33c Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 11 Nov 2022 20:10:27 +0800 Subject: [PATCH 446/963] dm bufio: Fix missing decrement of no_sleep_enabled if dm_bufio_client_create failed The 'no_sleep_enabled' should be decreased in error handling path in dm_bufio_client_create() when the DM_BUFIO_CLIENT_NO_SLEEP flag is set, otherwise static_branch_unlikely() will always return true even if no dm_bufio_client instances have DM_BUFIO_CLIENT_NO_SLEEP flag set. Cc: stable@vger.kernel.org Fixes: 3c1c875d0586 ("dm bufio: conditionally enable branching for DM_BUFIO_CLIENT_NO_SLEEP") Signed-off-by: Zhihao Cheng Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c5ef818ca36..bb786c39545e 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1858,6 +1858,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign dm_io_client_destroy(c->dm_io); bad_dm_io: mutex_destroy(&c->lock); + if (c->no_sleep) + static_branch_dec(&no_sleep_enabled); kfree(c); bad_client: return ERR_PTR(r); From 5e5dab5ec763d600fe0a67837dd9155bdc42f961 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 15 Nov 2022 12:48:26 -0500 Subject: [PATCH 447/963] dm integrity: flush the journal on suspend This commit flushes the journal on suspend. It is prerequisite for the next commit that enables activating dm integrity devices in read-only mode. Note that we deliberately didn't flush the journal on suspend, so that the journal replay code would be tested. However, the dm-integrity code is 5 years old now, so that journal replay is well-tested, and we can make this change now. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index aaf2472df6e5..b4a873cff725 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2591,10 +2591,6 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; - /* the following test is not needed, but it tests the replay code */ - if (unlikely(dm_post_suspending(ic->ti)) && !ic->meta_dev) - return; - spin_lock_irq(&ic->endio_wait.lock); write_start = ic->committed_section; write_sections = ic->n_committed_sections; @@ -3101,8 +3097,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti) drain_workqueue(ic->commit_wq); if (ic->mode == 'J') { - if (ic->meta_dev) - queue_work(ic->writer_wq, &ic->writer_work); + queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); dm_integrity_flush_buffers(ic, true); } From 984bf2cc531e778e49298fdf6730e0396166aa21 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 15 Nov 2022 12:51:50 -0500 Subject: [PATCH 448/963] dm integrity: clear the journal on suspend There was a problem that a user burned a dm-integrity image on CDROM and could not activate it because it had a non-empty journal. Fix this problem by flushing the journal (done by the previous commit) and clearing the journal (done by this commit). Once the journal is cleared, dm-integrity won't attempt to replay it on the next activation. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index b4a873cff725..648fdfecc42c 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -263,6 +263,7 @@ struct dm_integrity_c { struct completion crypto_backoff; + bool wrote_to_journal; bool journal_uptodate; bool just_formatted; bool recalculate_flag; @@ -2375,6 +2376,8 @@ static void integrity_commit(struct work_struct *w) if (!commit_sections) goto release_flush_bios; + ic->wrote_to_journal = true; + i = commit_start; for (n = 0; n < commit_sections; n++) { for (j = 0; j < ic->journal_section_entries; j++) { @@ -3100,6 +3103,14 @@ static void dm_integrity_postsuspend(struct dm_target *ti) queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); dm_integrity_flush_buffers(ic, true); + if (ic->wrote_to_journal) { + init_journal(ic, ic->free_section, + ic->journal_sections - ic->free_section, ic->commit_seq); + if (ic->free_section) { + init_journal(ic, 0, ic->free_section, + next_commit_seq(ic->commit_seq)); + } + } } if (ic->mode == 'B') { @@ -3127,6 +3138,8 @@ static void dm_integrity_resume(struct dm_target *ti) DEBUG_print("resume\n"); + ic->wrote_to_journal = false; + if (ic->provided_data_sectors != old_provided_data_sectors) { if (ic->provided_data_sectors > old_provided_data_sectors && ic->mode == 'B' && From 7fdbc5f014c3f71bc44673a2d6c5bb2d12d45f25 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 18 Nov 2022 15:41:41 +0000 Subject: [PATCH 449/963] io_uring: disallow self-propelled ring polling When we post a CQE we wake all ring pollers as it normally should be. However, if a CQE was generated by a multishot poll request targeting its own ring, it'll wake that request up, which will make it to post a new CQE, which will wake the request and so on until it exhausts all CQ entries. Don't allow multishot polling io_uring files but downgrade them to oneshots, which was always stated as a correct behaviour that the userspace should check for. Cc: stable@vger.kernel.org Fixes: aa43477b04025 ("io_uring: poll rework") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/3124038c0e7474d427538c2d915335ec28c92d21.1668785722.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/poll.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/poll.c b/io_uring/poll.c index c34019b18211..055632e9092a 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -246,6 +246,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) continue; if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; + if (io_is_uring_fops(req->file)) + return IOU_POLL_DONE; /* multishot, just fill a CQE and proceed */ if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { From b09d6acba1d9a23963fedf96b4191502a4fec25d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 18 Nov 2022 13:32:30 +0100 Subject: [PATCH 450/963] drm/amdgpu: handle gang submit before VMID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise it can happen that not all gang members can get a VMID assigned and we deadlock. Signed-off-by: Christian König Tested-by: Timur Kristóf Acked-by: Timur Kristóf Fixes: 68ce8b242242 ("drm/amdgpu: add gang submit backend v2") Reviewed-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20221118153023.312582-1-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index cd968e781077..abb99cff8b4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -254,6 +254,9 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, DRM_ERROR("Error adding fence (%d)\n", r); } + if (!fence && job->gang_submit) + fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + while (fence == NULL && vm && !job->vmid) { r = amdgpu_vmid_grab(vm, ring, &job->sync, &job->base.s_fence->finished, @@ -264,9 +267,6 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, fence = amdgpu_sync_get_fence(&job->sync); } - if (!fence && job->gang_submit) - fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); - return fence; } From c19083c72ea72a1c12037bb3d708014632df80e4 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 2 Aug 2022 16:01:58 -0500 Subject: [PATCH 451/963] dma-buf: Use dma_fence_unwrap_for_each when importing fences MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since 68129f431faa ("dma-buf: warn about containers in dma_resv object"), dma_resv_add_shared_fence will warn if you attempt to add a container fence. While most drivers were fine, fences can also be added to a dma_resv via the recently added DMA_BUF_IOCTL_IMPORT_SYNC_FILE. Use dma_fence_unwrap_for_each to add each fence one at a time. Fixes: 594740497e99 ("dma-buf: Add an API for importing sync files (v10)") Signed-off-by: Jason Ekstrand Reported-by: Sarah Walker Reviewed-by: Christian König CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20220802210158.4162525-1-jason.ekstrand@collabora.com Signed-off-by: Christian König --- drivers/dma-buf/dma-buf.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index dd0f83ee505b..e6f36c014c4c 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -391,8 +392,10 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf, const void __user *user_data) { struct dma_buf_import_sync_file arg; - struct dma_fence *fence; + struct dma_fence *fence, *f; enum dma_resv_usage usage; + struct dma_fence_unwrap iter; + unsigned int num_fences; int ret = 0; if (copy_from_user(&arg, user_data, sizeof(arg))) @@ -411,13 +414,21 @@ static long dma_buf_import_sync_file(struct dma_buf *dmabuf, usage = (arg.flags & DMA_BUF_SYNC_WRITE) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ; - dma_resv_lock(dmabuf->resv, NULL); + num_fences = 0; + dma_fence_unwrap_for_each(f, &iter, fence) + ++num_fences; - ret = dma_resv_reserve_fences(dmabuf->resv, 1); - if (!ret) - dma_resv_add_fence(dmabuf->resv, fence, usage); + if (num_fences > 0) { + dma_resv_lock(dmabuf->resv, NULL); - dma_resv_unlock(dmabuf->resv); + ret = dma_resv_reserve_fences(dmabuf->resv, num_fences); + if (!ret) { + dma_fence_unwrap_for_each(f, &iter, fence) + dma_resv_add_fence(dmabuf->resv, f, usage); + } + + dma_resv_unlock(dmabuf->resv); + } dma_fence_put(fence); From 5b47348fc0b18a78c96f8474cc90b7525ad1bbfe Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Thu, 17 Nov 2022 15:56:01 +0800 Subject: [PATCH 452/963] arm64/mm: fix incorrect file_map_count for non-leaf pmd/pud The page table check trigger BUG_ON() unexpectedly when collapse hugepage: ------------[ cut here ]------------ kernel BUG at mm/page_table_check.c:82! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 6 PID: 68 Comm: khugepaged Not tainted 6.1.0-rc3+ #750 Hardware name: linux,dummy-virt (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : page_table_check_clear.isra.0+0x258/0x3f0 lr : page_table_check_clear.isra.0+0x240/0x3f0 [...] Call trace: page_table_check_clear.isra.0+0x258/0x3f0 __page_table_check_pmd_clear+0xbc/0x108 pmdp_collapse_flush+0xb0/0x160 collapse_huge_page+0xa08/0x1080 hpage_collapse_scan_pmd+0xf30/0x1590 khugepaged_scan_mm_slot.constprop.0+0x52c/0xac8 khugepaged+0x338/0x518 kthread+0x278/0x2f8 ret_from_fork+0x10/0x20 [...] Since pmd_user_accessible_page() doesn't check if a pmd is leaf, it decrease file_map_count for a non-leaf pmd comes from collapse_huge_page(). and so trigger BUG_ON() unexpectedly. Fix this problem by using pmd_leaf() insteal of pmd_present() in pmd_user_accessible_page(). Moreover, use pud_leaf() for pud_user_accessible_page() too. Fixes: 42b2547137f5 ("arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK") Reported-by: Denys Vlasenko Signed-off-by: Liu Shixin Reviewed-by: David Hildenbrand Acked-by: Pasha Tatashin Reviewed-by: Kefeng Wang Acked-by: Will Deacon Link: https://lore.kernel.org/r/20221117075602.2904324-2-liushixin2@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 71a1af42f0e8..edf6625ce965 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -863,12 +863,12 @@ static inline bool pte_user_accessible_page(pte_t pte) static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_present(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) { - return pud_present(pud) && pud_user(pud); + return pud_leaf(pud) && pud_user(pud); } #endif From c678669d6b13b77de3b99b97526aaf23c3088d0a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 8 Nov 2022 10:35:34 +0100 Subject: [PATCH 453/963] iavf: Fix a crash during reset task Recent commit aa626da947e9 ("iavf: Detach device during reset task") removed netif_tx_stop_all_queues() with an assumption that Tx queues are already stopped by netif_device_detach() in the beginning of reset task. This assumption is incorrect because during reset task a potential link event can start Tx queues again. Revert this change to fix this issue. Reproducer: 1. Run some Tx traffic (e.g. iperf3) over iavf interface 2. Switch MTU of this interface in a loop [root@host ~]# cat repro.sh IF=enp2s0f0v0 iperf3 -c 192.168.0.1 -t 600 --logfile /dev/null & sleep 2 while :; do for i in 1280 1500 2000 900 ; do ip link set $IF mtu $i sleep 2 done done [root@host ~]# ./repro.sh Result: [ 306.199917] iavf 0000:02:02.0 enp2s0f0v0: NIC Link is Up Speed is 40 Gbps Full Duplex [ 308.205944] iavf 0000:02:02.0 enp2s0f0v0: NIC Link is Up Speed is 40 Gbps Full Duplex [ 310.103223] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 310.110179] #PF: supervisor write access in kernel mode [ 310.115396] #PF: error_code(0x0002) - not-present page [ 310.120526] PGD 0 P4D 0 [ 310.123057] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 310.127408] CPU: 24 PID: 183 Comm: kworker/u64:9 Kdump: loaded Not tainted 6.1.0-rc3+ #2 [ 310.135485] Hardware name: Abacus electric, s.r.o. - servis@abacus.cz Super Server/H12SSW-iN, BIOS 2.4 04/13/2022 [ 310.145728] Workqueue: iavf iavf_reset_task [iavf] [ 310.150520] RIP: 0010:iavf_xmit_frame_ring+0xd1/0xf70 [iavf] [ 310.156180] Code: d0 0f 86 da 00 00 00 83 e8 01 0f b7 fa 29 f8 01 c8 39 c6 0f 8f a0 08 00 00 48 8b 45 20 48 8d 14 92 bf 01 00 00 00 4c 8d 3c d0 <49> 89 5f 08 8b 43 70 66 41 89 7f 14 41 89 47 10 f6 83 82 00 00 00 [ 310.174918] RSP: 0018:ffffbb5f0082caa0 EFLAGS: 00010293 [ 310.180137] RAX: 0000000000000000 RBX: ffff92345471a6e8 RCX: 0000000000000200 [ 310.187259] RDX: 0000000000000000 RSI: 000000000000000d RDI: 0000000000000001 [ 310.194385] RBP: ffff92341d249000 R08: ffff92434987fcac R09: 0000000000000001 [ 310.201509] R10: 0000000011f683b9 R11: 0000000011f50641 R12: 0000000000000008 [ 310.208631] R13: ffff923447500000 R14: 0000000000000000 R15: 0000000000000000 [ 310.215756] FS: 0000000000000000(0000) GS:ffff92434ee00000(0000) knlGS:0000000000000000 [ 310.223835] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 310.229572] CR2: 0000000000000008 CR3: 0000000fbc210004 CR4: 0000000000770ee0 [ 310.236696] PKRU: 55555554 [ 310.239399] Call Trace: [ 310.241844] [ 310.243855] ? dst_alloc+0x5b/0xb0 [ 310.247260] dev_hard_start_xmit+0x9e/0x1f0 [ 310.251439] sch_direct_xmit+0xa0/0x370 [ 310.255276] __qdisc_run+0x13e/0x580 [ 310.258848] __dev_queue_xmit+0x431/0xd00 [ 310.262851] ? selinux_ip_postroute+0x147/0x3f0 [ 310.267377] ip_finish_output2+0x26c/0x540 Fixes: aa626da947e9 ("iavf: Detach device during reset task") Cc: Jacob Keller Cc: Patryk Piotrowski Cc: SlawomirX Laba Signed-off-by: Ivan Vecera Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 3fc572341781..5abcd66e7c7a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3033,6 +3033,7 @@ static void iavf_reset_task(struct work_struct *work) if (running) { netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); adapter->link_up = false; iavf_napi_disable_all(adapter); } From 08f1c147b7265245d67321585c68a27e990e0c4b Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 8 Nov 2022 11:25:02 +0100 Subject: [PATCH 454/963] iavf: Do not restart Tx queues after reset task failure After commit aa626da947e9 ("iavf: Detach device during reset task") the device is detached during reset task and re-attached at its end. The problem occurs when reset task fails because Tx queues are restarted during device re-attach and this leads later to a crash. To resolve this issue properly close the net device in cause of failure in reset task to avoid restarting of tx queues at the end. Also replace the hacky manipulation with IFF_UP flag by device close that clears properly both IFF_UP and __LINK_STATE_START flags. In these case iavf_close() does not do anything because the adapter state is already __IAVF_DOWN. Reproducer: 1) Run some Tx traffic (e.g. iperf3) over iavf interface 2) Set VF trusted / untrusted in loop [root@host ~]# cat repro.sh PF=enp65s0f0 IF=${PF}v0 ip link set up $IF ip addr add 192.168.0.2/24 dev $IF sleep 1 iperf3 -c 192.168.0.1 -t 600 --logfile /dev/null & sleep 2 while :; do ip link set $PF vf 0 trust on ip link set $PF vf 0 trust off done [root@host ~]# ./repro.sh Result: [ 2006.650969] iavf 0000:41:01.0: Failed to init adminq: -53 [ 2006.675662] ice 0000:41:00.0: VF 0 is now trusted [ 2006.689997] iavf 0000:41:01.0: Reset task did not complete, VF disabled [ 2006.696611] iavf 0000:41:01.0: failed to allocate resources during reinit [ 2006.703209] ice 0000:41:00.0: VF 0 is now untrusted [ 2006.737011] ice 0000:41:00.0: VF 0 is now trusted [ 2006.764536] ice 0000:41:00.0: VF 0 is now untrusted [ 2006.768919] BUG: kernel NULL pointer dereference, address: 0000000000000b4a [ 2006.776358] #PF: supervisor read access in kernel mode [ 2006.781488] #PF: error_code(0x0000) - not-present page [ 2006.786620] PGD 0 P4D 0 [ 2006.789152] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 2006.792903] ice 0000:41:00.0: VF 0 is now trusted [ 2006.793501] CPU: 4 PID: 0 Comm: swapper/4 Kdump: loaded Not tainted 6.1.0-rc3+ #2 [ 2006.805668] Hardware name: Abacus electric, s.r.o. - servis@abacus.cz Super Server/H12SSW-iN, BIOS 2.4 04/13/2022 [ 2006.815915] RIP: 0010:iavf_xmit_frame_ring+0x96/0xf70 [iavf] [ 2006.821028] ice 0000:41:00.0: VF 0 is now untrusted [ 2006.821572] Code: 48 83 c1 04 48 c1 e1 04 48 01 f9 48 83 c0 10 6b 50 f8 55 c1 ea 14 45 8d 64 14 01 48 39 c8 75 eb 41 83 fc 07 0f 8f e9 08 00 00 <0f> b7 45 4a 0f b7 55 48 41 8d 74 24 05 31 c9 66 39 d0 0f 86 da 00 [ 2006.845181] RSP: 0018:ffffb253004bc9e8 EFLAGS: 00010293 [ 2006.850397] RAX: ffff9d154de45b00 RBX: ffff9d15497d52e8 RCX: ffff9d154de45b00 [ 2006.856327] ice 0000:41:00.0: VF 0 is now trusted [ 2006.857523] RDX: 0000000000000000 RSI: 00000000000005a8 RDI: ffff9d154de45ac0 [ 2006.857525] RBP: 0000000000000b00 R08: ffff9d159cb010ac R09: 0000000000000001 [ 2006.857526] R10: ffff9d154de45940 R11: 0000000000000000 R12: 0000000000000002 [ 2006.883600] R13: ffff9d1770838dc0 R14: 0000000000000000 R15: ffffffffc07b8380 [ 2006.885840] ice 0000:41:00.0: VF 0 is now untrusted [ 2006.890725] FS: 0000000000000000(0000) GS:ffff9d248e900000(0000) knlGS:0000000000000000 [ 2006.890727] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2006.909419] CR2: 0000000000000b4a CR3: 0000000c39c10002 CR4: 0000000000770ee0 [ 2006.916543] PKRU: 55555554 [ 2006.918254] ice 0000:41:00.0: VF 0 is now trusted [ 2006.919248] Call Trace: [ 2006.919250] [ 2006.919252] dev_hard_start_xmit+0x9e/0x1f0 [ 2006.932587] sch_direct_xmit+0xa0/0x370 [ 2006.936424] __dev_queue_xmit+0x7af/0xd00 [ 2006.940429] ip_finish_output2+0x26c/0x540 [ 2006.944519] ip_output+0x71/0x110 [ 2006.947831] ? __ip_finish_output+0x2b0/0x2b0 [ 2006.952180] __ip_queue_xmit+0x16d/0x400 [ 2006.952721] ice 0000:41:00.0: VF 0 is now untrusted [ 2006.956098] __tcp_transmit_skb+0xa96/0xbf0 [ 2006.965148] __tcp_retransmit_skb+0x174/0x860 [ 2006.969499] ? cubictcp_cwnd_event+0x40/0x40 [ 2006.973769] tcp_retransmit_skb+0x14/0xb0 ... Fixes: aa626da947e9 ("iavf: Detach device during reset task") Cc: Jacob Keller Cc: Patryk Piotrowski Cc: SlawomirX Laba Signed-off-by: Ivan Vecera Reviewed-by: Jacob Keller Reviewed-by: Leon Romanovsky Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 5abcd66e7c7a..b66f8fa1d83b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -2921,7 +2921,6 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_queues(adapter); memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); - adapter->netdev->flags &= ~IFF_UP; adapter->flags &= ~IAVF_FLAG_RESET_PENDING; iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); @@ -3021,6 +3020,11 @@ static void iavf_reset_task(struct work_struct *work) iavf_disable_vf(adapter); mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + if (netif_running(netdev)) { + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + } return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3173,6 +3177,16 @@ static void iavf_reset_task(struct work_struct *work) mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + + if (netif_running(netdev)) { + /* Close device to ensure that Tx queues will not be started + * during netif_device_attach() at the end of the reset task. + */ + rtnl_lock(); + dev_close(netdev); + rtnl_unlock(); + } + dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); reset_finish: rtnl_lock(); From bb861c14f1b8cb9cbf03a132db7f22ec4e692b91 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Thu, 10 Nov 2022 15:14:44 +0100 Subject: [PATCH 455/963] iavf: remove INITIAL_MAC_SET to allow gARP to work properly IAVF_FLAG_INITIAL_MAC_SET prevents waiting on iavf_is_mac_set_handled() the first time the MAC is set. This breaks gratuitous ARP because the MAC address has not been updated yet when the gARP packet is sent out. Current behaviour: $ echo 1 > /sys/class/net/ens4f0/device/sriov_numvfs iavf 0000:88:02.0: MAC address: ee:04:19:14:ec:ea $ ip addr add 192.168.1.1/24 dev ens4f0v0 $ ip link set dev ens4f0v0 up $ echo 1 > /proc/sys/net/ipv4/conf/ens4f0v0/arp_notify $ ip link set ens4f0v0 addr 00:11:22:33:44:55 07:23:41.676611 ee:04:19:14:ec:ea > ff:ff:ff:ff:ff:ff, ethertype ARP (0x0806), length 42: Request who-has 192.168.1.1 tell 192.168.1.1, length 28 With IAVF_FLAG_INITIAL_MAC_SET removed: $ echo 1 > /sys/class/net/ens4f0/device/sriov_numvfs iavf 0000:88:02.0: MAC address: 3e:8a:16:a2:37:6d $ ip addr add 192.168.1.1/24 dev ens4f0v0 $ ip link set dev ens4f0v0 up $ echo 1 > /proc/sys/net/ipv4/conf/ens4f0v0/arp_notify $ ip link set ens4f0v0 addr 00:11:22:33:44:55 07:28:01.836608 00:11:22:33:44:55 > ff:ff:ff:ff:ff:ff, ethertype ARP (0x0806), length 42: Request who-has 192.168.1.1 tell 192.168.1.1, length 28 Fixes: 35a2443d0910 ("iavf: Add waiting for response from PF in set mac") Signed-off-by: Stefan Assmann Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 1 - drivers/net/ethernet/intel/iavf/iavf_main.c | 8 -------- 2 files changed, 9 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 3f6187c16424..0d1bab4ac1b0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -298,7 +298,6 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) -#define IAVF_FLAG_INITIAL_MAC_SET BIT(23) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index b66f8fa1d83b..801f5b7b8119 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1087,12 +1087,6 @@ static int iavf_set_mac(struct net_device *netdev, void *p) if (ret) return ret; - /* If this is an initial set MAC during VF spawn do not wait */ - if (adapter->flags & IAVF_FLAG_INITIAL_MAC_SET) { - adapter->flags &= ~IAVF_FLAG_INITIAL_MAC_SET; - return 0; - } - ret = wait_event_interruptible_timeout(adapter->vc_waitqueue, iavf_is_mac_set_handled(netdev, addr->sa_data), msecs_to_jiffies(2500)); @@ -2605,8 +2599,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } - adapter->flags |= IAVF_FLAG_INITIAL_MAC_SET; - adapter->tx_desc_count = IAVF_DEFAULT_TXD; adapter->rx_desc_count = IAVF_DEFAULT_RXD; err = iavf_init_interrupt_scheme(adapter); From a8417330f8a57275ed934293e832982b6d882713 Mon Sep 17 00:00:00 2001 From: Slawomir Laba Date: Thu, 3 Nov 2022 14:00:03 +0100 Subject: [PATCH 456/963] iavf: Fix race condition between iavf_shutdown and iavf_remove Fix a deadlock introduced by commit 974578017fc1 ("iavf: Add waiting so the port is initialized in remove") due to race condition between iavf_shutdown and iavf_remove, where iavf_remove stucks forever in while loop since iavf_shutdown already set __IAVF_REMOVE adapter state. Fix this by checking if the __IAVF_IN_REMOVE_TASK has already been set and return if so. Fixes: 974578017fc1 ("iavf: Add waiting so the port is initialized in remove") Signed-off-by: Slawomir Laba Signed-off-by: Mateusz Palczewski Tested-by: Marek Szlosek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 801f5b7b8119..d7465296f650 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5042,23 +5042,21 @@ static int __maybe_unused iavf_resume(struct device *dev_d) static void iavf_remove(struct pci_dev *pdev) { struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); - struct net_device *netdev = adapter->netdev; struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; + struct iavf_cloud_filter *cf, *cftmp; struct iavf_adv_rss *rss, *rsstmp; struct iavf_mac_filter *f, *ftmp; - struct iavf_cloud_filter *cf, *cftmp; - struct iavf_hw *hw = &adapter->hw; + struct net_device *netdev; + struct iavf_hw *hw; int err; - /* When reboot/shutdown is in progress no need to do anything - * as the adapter is already REMOVE state that was set during - * iavf_shutdown() callback. - */ - if (adapter->state == __IAVF_REMOVE) + netdev = adapter->netdev; + hw = &adapter->hw; + + if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) return; - set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section); /* Wait until port initialization is complete. * There are flows where register/unregister netdev may race. */ From 81cd7e8489278d28794e7b272950c3e00c344e44 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Fri, 18 Nov 2022 15:40:03 -0800 Subject: [PATCH 457/963] Input: i8042 - fix leaking of platform device on module removal Avoid resetting the module-wide i8042_platform_device pointer in i8042_probe() or i8042_remove(), so that the device can be properly destroyed by i8042_exit() on module unload. Fixes: 9222ba68c3f4 ("Input: i8042 - add deferred probe support") Signed-off-by: Chen Jun Link: https://lore.kernel.org/r/20221109034148.23821-1-chenjun102@huawei.com Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index f9486495baef..6dac7c1853a5 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1543,8 +1543,6 @@ static int i8042_probe(struct platform_device *dev) { int error; - i8042_platform_device = dev; - if (i8042_reset == I8042_RESET_ALWAYS) { error = i8042_controller_selftest(); if (error) @@ -1582,7 +1580,6 @@ static int i8042_probe(struct platform_device *dev) i8042_free_aux_ports(); /* in case KBD failed but AUX not */ i8042_free_irqs(); i8042_controller_reset(false); - i8042_platform_device = NULL; return error; } @@ -1592,7 +1589,6 @@ static int i8042_remove(struct platform_device *dev) i8042_unregister_ports(); i8042_free_irqs(); i8042_controller_reset(false); - i8042_platform_device = NULL; return 0; } From f31e3c204d1844b8680a442a48868af5ac3d5481 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 17 Nov 2022 06:20:11 +0000 Subject: [PATCH 458/963] ARM: mxs: fix memory leak in mxs_machine_init() If of_property_read_string() failed, 'soc_dev_attr' should be freed before return. Otherwise there is a memory leak. Fixes: 2046338dcbc6 ("ARM: mxs: Use soc bus infrastructure") Signed-off-by: Zheng Yongjun Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm/mach-mxs/mach-mxs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-mxs/mach-mxs.c b/arch/arm/mach-mxs/mach-mxs.c index 25c9d184fa4c..1c57ac401649 100644 --- a/arch/arm/mach-mxs/mach-mxs.c +++ b/arch/arm/mach-mxs/mach-mxs.c @@ -393,8 +393,10 @@ static void __init mxs_machine_init(void) root = of_find_node_by_path("/"); ret = of_property_read_string(root, "model", &soc_dev_attr->machine); - if (ret) + if (ret) { + kfree(soc_dev_attr); return; + } soc_dev_attr->family = "Freescale MXS Family"; soc_dev_attr->soc_id = mxs_get_soc_id(); From af8a6329f2417721fc4588a91427b5928946f12d Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 17 Nov 2022 17:53:53 +0800 Subject: [PATCH 459/963] arm64: dts: imx8mp-evk: correct pcie pad settings According to RM bit layout, BIT3 and BIT0 are reserved. 8 7 6 5 4 3 2 1 0 PE HYS PUE ODE FSEL X DSE X Although function is not broken, we should not set reserved bit. Fixes: d50650500064 ("arm64: dts: imx8mp-evk: Add PCIe support") Signed-off-by: Peng Fan Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp-evk.dts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index 9f1469db554d..b4c1ef2559f2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -544,14 +544,14 @@ MX8MP_IOMUXC_SPDIF_TX__I2C5_SCL 0x400001c2 pinctrl_pcie0: pcie0grp { fsl,pins = < - MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x61 /* open drain, pull up */ - MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x41 + MX8MP_IOMUXC_I2C4_SCL__PCIE_CLKREQ_B 0x60 /* open drain, pull up */ + MX8MP_IOMUXC_SD1_DATA5__GPIO2_IO07 0x40 >; }; pinctrl_pcie0_reg: pcie0reggrp { fsl,pins = < - MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x41 + MX8MP_IOMUXC_SD1_DATA4__GPIO2_IO06 0x40 >; }; From e68be7b39f21d8a9291a5a3019787cd3ca999dd7 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 18 Nov 2022 10:41:02 -0300 Subject: [PATCH 460/963] ARM: dts: imx6q-prti6q: Fix ref/tcxo-clock-frequency properties make dtbs_check gives the following errors: ref-clock-frequency: size (9) error for type uint32 tcxo-clock-frequency: size (9) error for type uint32 Fix it by passing the frequencies inside < > as documented in Documentation/devicetree/bindings/net/wireless/ti,wlcore.yaml. Signed-off-by: Fabio Estevam Fixes: 0d446a505592 ("ARM: dts: add Protonic PRTI6Q board") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-prti6q.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index b4605edfd2ab..d8fa83effd63 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -364,8 +364,8 @@ wifi { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_wifi>; interrupts-extended = <&gpio1 30 IRQ_TYPE_LEVEL_HIGH>; - ref-clock-frequency = "38400000"; - tcxo-clock-frequency = "19200000"; + ref-clock-frequency = <38400000>; + tcxo-clock-frequency = <19200000>; }; }; From f70074140524c59a0935947b06dd6cb6e1ea642d Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 17 Nov 2022 19:13:56 +0800 Subject: [PATCH 461/963] net: ethernet: mtk_eth_soc: fix error handling in mtk_open() If mtk_start_dma() fails, invoke phylink_disconnect_phy() to perform cleanup. phylink_disconnect_phy() contains the put_device action. If phylink_disconnect_phy is not performed, the Kref of netdev will leak. Fixes: b8fc9f30821e ("net: ethernet: mediatek: Add basic PHYLINK support") Signed-off-by: Liu Jian Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20221117111356.161547-1-liujian56@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 7cd381530aa4..1d1f2342e3ec 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2996,8 +2996,10 @@ static int mtk_open(struct net_device *dev) int i; err = mtk_start_dma(eth); - if (err) + if (err) { + phylink_disconnect_phy(mac->phylink); return err; + } for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) mtk_ppe_start(eth->ppe[i]); From 594c61ffc77de0a197934aa0f1df9285c68801c6 Mon Sep 17 00:00:00 2001 From: Peter Kosyh Date: Thu, 17 Nov 2022 18:28:06 +0300 Subject: [PATCH 462/963] net/mlx4: Check retval of mlx4_bitmap_init If mlx4_bitmap_init fails, mlx4_bitmap_alloc_range will dereference the NULL pointer (bitmap->table). Make sure, that mlx4_bitmap_alloc_range called in no error case. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d57febe1a478 ("net/mlx4: Add A0 hybrid steering") Reviewed-by: Tariq Toukan Signed-off-by: Peter Kosyh Link: https://lore.kernel.org/r/20221117152806.278072-1-pkosyh@yandex.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index b149e601f673..48cfaa7eaf50 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -697,7 +697,8 @@ static int mlx4_create_zones(struct mlx4_dev *dev, err = mlx4_bitmap_init(*bitmap + k, 1, MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0, 0); - mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); + if (!err) + mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0); } if (err) From cbe867685386af1f0a2648f5279f6e4c74bfd17f Mon Sep 17 00:00:00 2001 From: Hui Tang Date: Thu, 17 Nov 2022 16:40:32 +0800 Subject: [PATCH 463/963] net: mvpp2: fix possible invalid pointer dereference It will cause invalid pointer dereference to priv->cm3_base behind, if PTR_ERR(priv->cm3_base) in mvpp2_get_sram(). Fixes: e54ad1e01c00 ("net: mvpp2: add CM3 SRAM memory map") Signed-off-by: Hui Tang Link: https://lore.kernel.org/r/20221117084032.101144-1-tanghui20@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index eb0fb8128096..b399bdb1ca36 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7350,6 +7350,7 @@ static int mvpp2_get_sram(struct platform_device *pdev, struct mvpp2 *priv) { struct resource *res; + void __iomem *base; res = platform_get_resource(pdev, IORESOURCE_MEM, 2); if (!res) { @@ -7360,9 +7361,12 @@ static int mvpp2_get_sram(struct platform_device *pdev, return 0; } - priv->cm3_base = devm_ioremap_resource(&pdev->dev, res); + base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(base)) + return PTR_ERR(base); - return PTR_ERR_OR_ZERO(priv->cm3_base); + priv->cm3_base = base; + return 0; } static int mvpp2_probe(struct platform_device *pdev) From 62a7311fb96c61d281da9852dbee4712fc8c3277 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 17 Nov 2022 16:50:38 +0800 Subject: [PATCH 464/963] net/qla3xxx: fix potential memleak in ql3xxx_send() The ql3xxx_send() returns NETDEV_TX_OK without freeing skb in error handling case, add dev_kfree_skb_any() to fix it. Fixes: bd36b0ac5d06 ("qla3xxx: Add support for Qlogic 4032 chip.") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1668675039-21138-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qla3xxx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 76072f8c3d2f..0d57ffcedf0c 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2471,6 +2471,7 @@ static netdev_tx_t ql3xxx_send(struct sk_buff *skb, skb_shinfo(skb)->nr_frags); if (tx_cb->seg_count == -1) { netdev_err(ndev, "%s: invalid segment count!\n", __func__); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } From d66608803aa2ffb9e475623343f69996305771ae Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 17 Nov 2022 20:46:58 +0800 Subject: [PATCH 465/963] octeontx2-af: debugsfs: fix pci device refcount leak As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). So before returning from rvu_dbg_rvu_pf_cgx_map_display() or cgx_print_dmac_flt(), pci_dev_put() is called to avoid refcount leak. Fixes: dbc52debf95f ("octeontx2-af: Debugfs support for DMAC filters") Fixes: e2fb37303865 ("octeontx2-af: Display CGX, NIX and PF map in debugfs.") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221117124658.162409-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a1970ebedf95..f66dde2b0f92 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -880,6 +880,8 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused) sprintf(lmac, "LMAC%d", lmac_id); seq_printf(filp, "%s\t0x%x\t\tNIX%d\t\t%s\t%s\n", dev_name(&pdev->dev), pcifunc, blkid, cgx, lmac); + + pci_dev_put(pdev); } return 0; } @@ -2566,6 +2568,7 @@ static int cgx_print_dmac_flt(struct seq_file *s, int lmac_id) } } + pci_dev_put(pdev); return 0; } From 5619537284f1017e9f6c7500b02b859b3830a06d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 17 Nov 2022 21:51:48 +0800 Subject: [PATCH 466/963] net: pch_gbe: fix pci device refcount leak while module exiting As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). In pch_gbe_probe(), pci_get_domain_bus_and_slot() is called, so in error path in probe() and remove() function, pci_dev_put() should be called to avoid refcount leak. Compile tested only. Fixes: 1a0bdadb4e36 ("net/pch_gbe: supports eg20t ptp clock") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221117135148.301014-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index c9ae47128a07..28b7cec485ef 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2460,6 +2460,7 @@ static void pch_gbe_remove(struct pci_dev *pdev) unregister_netdev(netdev); pch_gbe_phy_hw_reset(&adapter->hw); + pci_dev_put(adapter->ptp_pdev); free_netdev(netdev); } @@ -2534,7 +2535,7 @@ static int pch_gbe_probe(struct pci_dev *pdev, /* setup the private structure */ ret = pch_gbe_sw_init(adapter); if (ret) - goto err_free_netdev; + goto err_put_dev; /* Initialize PHY */ ret = pch_gbe_init_phy(adapter); @@ -2592,6 +2593,8 @@ static int pch_gbe_probe(struct pci_dev *pdev, err_free_adapter: pch_gbe_phy_hw_reset(&adapter->hw); +err_put_dev: + pci_dev_put(adapter->ptp_pdev); err_free_netdev: free_netdev(netdev); return ret; From 4abd9600b9d15d3d92a9ac25cf200422a4c415ee Mon Sep 17 00:00:00 2001 From: Diana Wang Date: Thu, 17 Nov 2022 16:37:43 +0100 Subject: [PATCH 467/963] nfp: fill splittable of devlink_port_attrs correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error is reflected in that it shows wrong splittable status of port when executing "devlink port show". The reason which leads the error is that the assigned operation of splittable is just a simple negation operation of split and it does not consider port lanes quantity. A splittable port should have several lanes that can be split(lanes quantity > 1). If without the judgement, it will show wrong message for some firmware, such as 2x25G, 2x10G. Fixes: a0f49b548652 ("devlink: Add a new devlink port split ability attribute and pass to netlink") Signed-off-by: Diana Wang Reviewed-by: Louis Peens Reviewed-by: Niklas Söderlund Signed-off-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 405786c00334..cb08d7bf9524 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -341,7 +341,7 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) return ret; attrs.split = eth_port.is_split; - attrs.splittable = !attrs.split; + attrs.splittable = eth_port.port_lanes > 1 && !attrs.split; attrs.lanes = eth_port.port_lanes; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; attrs.phys.port_number = eth_port.label_port; From 0873016d46f6dfafd1bdf4d9b935b3331b226f7c Mon Sep 17 00:00:00 2001 From: Jaco Coetzee Date: Thu, 17 Nov 2022 16:37:44 +0100 Subject: [PATCH 468/963] nfp: add port from netdev validation for EEPROM access Setting of the port flag `NFP_PORT_CHANGED`, introduced to ensure the correct reading of EEPROM data, causes a fatal kernel NULL pointer dereference in cases where the target netdev type cannot be determined. Add validation of port struct pointer before attempting to set the `NFP_PORT_CHANGED` flag. Return that operation is not supported if the netdev type cannot be determined. Fixes: 4ae97cae07e1 ("nfp: ethtool: fix the display error of `ethtool -m DEVNAME`") Signed-off-by: Jaco Coetzee Reviewed-by: Louis Peens Signed-off-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1775997f9c69..991059d6cb32 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1432,6 +1432,9 @@ nfp_port_get_module_info(struct net_device *netdev, u8 data; port = nfp_port_from_netdev(netdev); + if (!port) + return -EOPNOTSUPP; + /* update port state to get latest interface */ set_bit(NFP_PORT_CHANGED, &port->flags); eth_port = nfp_port_get_eth_port(port); From 4d633d1b468b6eb107a81b2fd10b9debddca3d47 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 18 Nov 2022 11:43:53 +0800 Subject: [PATCH 469/963] bonding: fix ICMPv6 header handling when receiving IPv6 messages Currently, we get icmp6hdr via function icmp6_hdr(), which needs the skb transport header to be set first. But there is no rule to ask driver set transport header before netif_receive_skb() and bond_handle_frame(). So we will not able to get correct icmp6hdr on some drivers. Fix this by using skb_header_pointer to get the IPv6 and ICMPV6 headers. Reported-by: Liang Li Fixes: 4e24be018eb9 ("bonding: add new parameter ns_targets") Suggested-by: Eric Dumazet Signed-off-by: Hangbin Liu Reviewed-by: Eric Dumazet Acked-by: Jay Vosburgh Link: https://lore.kernel.org/r/20221118034353.1736727-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e84c49bf4d0c..f298b9b3eb77 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3231,16 +3231,23 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct slave *curr_active_slave, *curr_arp_slave; - struct icmp6hdr *hdr = icmp6_hdr(skb); struct in6_addr *saddr, *daddr; + struct { + struct ipv6hdr ip6; + struct icmp6hdr icmp6; + } *combined, _combined; if (skb->pkt_type == PACKET_OTHERHOST || - skb->pkt_type == PACKET_LOOPBACK || - hdr->icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) + skb->pkt_type == PACKET_LOOPBACK) goto out; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; + combined = skb_header_pointer(skb, 0, sizeof(_combined), &_combined); + if (!combined || combined->ip6.nexthdr != NEXTHDR_ICMP || + combined->icmp6.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT) + goto out; + + saddr = &combined->ip6.saddr; + daddr = &combined->ip6.saddr; slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", __func__, slave->dev->name, bond_slave_state(slave), From 7cef6b73fba96abef731a53501924fc3c4a0f947 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 18 Nov 2022 09:12:49 +0800 Subject: [PATCH 470/963] macsec: Fix invalid error code set 'ret' is defined twice in macsec_changelink(), when it is set in macsec_is_offloaded case, it will be invalid before return. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: YueHaibing Reviewed-by: Saeed Mahameed Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20221118011249.48112-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 85376d2f24ca..f41f67b583db 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3835,7 +3835,6 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; - int ret; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { From 406c706c7b7f1730aa787e914817b8d16b1e99f6 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Thu, 3 Nov 2022 17:02:10 +0000 Subject: [PATCH 471/963] vfs: vfs_tmpfile: ensure O_EXCL flag is enforced If O_EXCL is *not* specified, then linkat() can be used to link the temporary file into the filesystem. If O_EXCL is specified then linkat() should fail (-1). After commit 863f144f12ad ("vfs: open inside ->tmpfile()") the O_EXCL flag is no longer honored by the vfs layer for tmpfile, which means the file can be linked even if O_EXCL flag is specified, which is a change in behaviour for userspace! The open flags was previously passed as a parameter, so it was uneffected by the changes to file->f_flags caused by finish_open(). This patch fixes the issue by storing file->f_flags in a local variable so the O_EXCL test logic is restored. This regression was detected by Android CTS Bionic fcntl() tests running on android-mainline [1]. [1] https://android.googlesource.com/platform/bionic/+/ refs/heads/master/tests/fcntl_test.cpp#352 Fixes: 863f144f12ad ("vfs: open inside ->tmpfile()") Acked-by: Miklos Szeredi Tested-by: Will McVicker Signed-off-by: Peter Griffin Signed-off-by: Al Viro --- fs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 578c2110df02..9155ecb547ce 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3591,6 +3591,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir = d_inode(parentpath->dentry); struct inode *inode; int error; + int open_flag = file->f_flags; /* we want directory to be writable */ error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC); @@ -3613,7 +3614,7 @@ static int vfs_tmpfile(struct user_namespace *mnt_userns, if (error) return error; inode = file_inode(file); - if (!(file->f_flags & O_EXCL)) { + if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); From 14f16d47561ba9249efc6c2db9d47ed56841f070 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 16 Nov 2022 16:37:36 -0700 Subject: [PATCH 472/963] ACPI: HMAT: remove unnecessary variable initialization In hmat_register_target_initiators(), the variable 'best' gets initialized in the outer per-locality-type for loop. The initialization just before setting up 'Access 1' targets was unnecessary. Remove it. Cc: Rafael J. Wysocki Cc: Liu Shixin Cc: Dan Williams Acked-by: Kirill A. Shutemov Acked-by: Rafael J. Wysocki Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20221116-acpi_hmat_fix-v2-1-3712569be691@intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 23f49a2f4d14..144a84f429ed 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -644,7 +644,6 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); list_sort(p_nodes, &initiators, initiator_cmp); - best = 0; for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) From 48d4180939e12c4bd2846f984436d895bb9699ed Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 16 Nov 2022 16:37:37 -0700 Subject: [PATCH 473/963] ACPI: HMAT: Fix initiator registration for single-initiator systems In a system with a single initiator node, and one or more memory-only 'target' nodes, the memory-only node(s) would fail to register their initiator node correctly. i.e. in sysfs: # ls /sys/devices/system/node/node0/access0/targets/ node0 Where as the correct behavior should be: # ls /sys/devices/system/node/node0/access0/targets/ node0 node1 This happened because hmat_register_target_initiators() uses list_sort() to sort the initiator list, but the sort comparision function (initiator_cmp()) is overloaded to also set the node mask's bits. In a system with a single initiator, the list is singular, and list_sort elides the comparision helper call. Thus the node mask never gets set, and the subsequent search for the best initiator comes up empty. Add a new helper to consume the sorted initiator list, and generate the nodemask, decoupling it from the overloaded initiator_cmp() comparision callback. This prevents the singular list corner case naturally, and makes the code easier to follow as well. Cc: Cc: Rafael J. Wysocki Cc: Liu Shixin Cc: Dan Williams Cc: Kirill A. Shutemov Reported-by: Chris Piper Signed-off-by: Vishal Verma Acked-by: Rafael J. Wysocki Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20221116-acpi_hmat_fix-v2-2-3712569be691@intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 144a84f429ed..6cceca64a6bc 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -562,17 +562,26 @@ static int initiator_cmp(void *priv, const struct list_head *a, { struct memory_initiator *ia; struct memory_initiator *ib; - unsigned long *p_nodes = priv; ia = list_entry(a, struct memory_initiator, node); ib = list_entry(b, struct memory_initiator, node); - set_bit(ia->processor_pxm, p_nodes); - set_bit(ib->processor_pxm, p_nodes); - return ia->processor_pxm - ib->processor_pxm; } +static int initiators_to_nodemask(unsigned long *p_nodes) +{ + struct memory_initiator *initiator; + + if (list_empty(&initiators)) + return -ENXIO; + + list_for_each_entry(initiator, &initiators, node) + set_bit(initiator->processor_pxm, p_nodes); + + return 0; +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -609,7 +618,10 @@ static void hmat_register_target_initiators(struct memory_target *target) * initiators. */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + list_sort(NULL, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + if (!access0done) { for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; @@ -643,7 +655,9 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) From 05530ef7cf7c7d700f6753f058999b1b5099a026 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 18 Nov 2022 15:23:50 -0800 Subject: [PATCH 474/963] ALSA: seq: Fix function prototype mismatch in snd_seq_expand_var_event With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), indirect call targets are validated against the expected function pointer prototype to make sure the call target is valid to help mitigate ROP attacks. If they are not identical, there is a failure at run time, which manifests as either a kernel panic or thread getting killed. seq_copy_in_user() and seq_copy_in_kernel() did not have prototypes matching snd_seq_dump_func_t. Adjust this and remove the casts. There are not resulting binary output differences. This was found as a result of Clang's new -Wcast-function-type-strict flag, which is more sensitive than the simpler -Wcast-function-type, which only checks for type width mismatches. Reported-by: kernel test robot Link: https://lore.kernel.org/lkml/202211041527.HD8TLSE1-lkp@intel.com Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: "Gustavo A. R. Silva" Cc: alsa-devel@alsa-project.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221118232346.never.380-kees@kernel.org Signed-off-by: Takashi Iwai --- sound/core/seq/seq_memory.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index b7aee23fc387..47ef6bc30c0e 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -113,15 +113,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event); * expand the variable length event to linear buffer space. */ -static int seq_copy_in_kernel(char **bufptr, const void *src, int size) +static int seq_copy_in_kernel(void *ptr, void *src, int size) { + char **bufptr = ptr; + memcpy(*bufptr, src, size); *bufptr += size; return 0; } -static int seq_copy_in_user(char __user **bufptr, const void *src, int size) +static int seq_copy_in_user(void *ptr, void *src, int size) { + char __user **bufptr = ptr; + if (copy_to_user(*bufptr, src, size)) return -EFAULT; *bufptr += size; @@ -151,8 +155,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char return newlen; } err = snd_seq_dump_var_event(event, - in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel : - (snd_seq_dump_func_t)seq_copy_in_user, + in_kernel ? seq_copy_in_kernel : seq_copy_in_user, &buf); return err < 0 ? err : newlen; } From 242b0aaeabbe2efbef1b9d42a8e56627e800964c Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Wed, 16 Nov 2022 13:15:43 +0800 Subject: [PATCH 475/963] iommu/vt-d: Preset Access bit for IOVA in FL non-leaf paging entries The A/D bits are preseted for IOVA over first level(FL) usage for both kernel DMA (i.e, domain typs is IOMMU_DOMAIN_DMA) and user space DMA usage (i.e., domain type is IOMMU_DOMAIN_UNMANAGED). Presetting A bit in FL requires to preset the bit in every related paging entries, including the non-leaf ones. Otherwise, hardware may treat this as an error. For example, in a case of ECAP_REG.SMPWC==0, DMA faults might occur with below DMAR fault messages (wrapped for line length) dumped. DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Read NO_PASID] Request device [aa:00.0] fault addr 0x10c3a6000 [fault reason 0x90] SM: A/D bit update needed in first-level entry when set up in no snoop Fixes: 289b3b005cb9 ("iommu/vt-d: Preset A/D bits for user space DMA usage") Cc: stable@vger.kernel.org Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20221113010324.1094483-1-tina.zhang@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20221116051544.26540-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 48cdcd0a5cf3..996a8b5ee5ee 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -959,11 +959,9 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (domain_use_first_level(domain)) { - pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US; - if (iommu_is_dma_domain(&domain->domain)) - pteval |= DMA_FL_PTE_ACCESS; - } + if (domain_use_first_level(domain)) + pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); From 7fc961cf7ffcb130c4e93ee9a5628134f9de700a Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Wed, 16 Nov 2022 13:15:44 +0800 Subject: [PATCH 476/963] iommu/vt-d: Set SRE bit only when hardware has SRS cap SRS cap is the hardware cap telling if the hardware IOMMU can support requests seeking supervisor privilege or not. SRE bit in scalable-mode PASID table entry is treated as Reserved(0) for implementation not supporting SRS cap. Checking SRS cap before setting SRE bit can avoid the non-recoverable fault of "Non-zero reserved field set in PASID Table Entry" caused by setting SRE bit while there is no SRS cap support. The fault messages look like below: DMAR: DRHD: handling fault status reg 2 DMAR: [DMA Read NO_PASID] Request device [00:0d.0] fault addr 0x1154e1000 [fault reason 0x5a] SM: Non-zero reserved field set in PASID Table Entry Fixes: 6f7db75e1c46 ("iommu/vt-d: Add second level page table interface") Cc: stable@vger.kernel.org Signed-off-by: Tina Zhang Link: https://lore.kernel.org/r/20221115070346.1112273-1-tina.zhang@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20221116051544.26540-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c30ddac40ee5..e13d7e5273e1 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -642,7 +642,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). */ - if (pasid != PASID_RID2PASID) + if (pasid != PASID_RID2PASID && ecap_srs(iommu->ecap)) pasid_set_sre(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); @@ -685,7 +685,8 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu, * We should set SRE bit as well since the addresses are expected * to be GPAs. */ - pasid_set_sre(pte); + if (ecap_srs(iommu->ecap)) + pasid_set_sre(pte); pasid_set_present(pte); spin_unlock(&iommu->lock); From f391d6ee002ea022c62dc0b09d0578f3ccce81be Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Nov 2022 14:48:00 +0300 Subject: [PATCH 477/963] cifs: Use after free in debug code This debug code dereferences "old_iface" after it was already freed by the call to release_iface(). Re-order the debugging to avoid this issue. Fixes: b54034a73baf ("cifs: during reconnect, update interface if necessary") Cc: stable@vger.kernel.org # 5.19+ Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Dan Carpenter Signed-off-by: Steve French --- fs/cifs/sess.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 92e4278ec35d..9e7d9f0baa18 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -302,14 +302,14 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) /* now drop the ref to the current iface */ if (old_iface && iface) { - kref_put(&old_iface->refcount, release_iface); cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n", &old_iface->sockaddr, &iface->sockaddr); - } else if (old_iface) { kref_put(&old_iface->refcount, release_iface); + } else if (old_iface) { cifs_dbg(FYI, "releasing ref to iface: %pIS\n", &old_iface->sockaddr); + kref_put(&old_iface->refcount, release_iface); } else { WARN_ON(!iface); cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr); From 07e06193ead86d4812f431b4d87bbd4161222e3f Mon Sep 17 00:00:00 2001 From: Derek Nguyen Date: Thu, 10 Nov 2022 13:21:08 -0600 Subject: [PATCH 478/963] hwmon: (ltc2947) fix temperature scaling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LTC2947 datasheet (Rev. B) calls out in the section "Register Description: Non-Accumulated Result Registers" (pg. 30) that "To calculate temperature, multiply the TEMP register value by 0.204°C and add 5.5°C". Fix to add 5.5C and not 0.55C. Fixes: 9f90fd652bed ("hwmon: Add support for ltc2947") Signed-off-by: Derek Nguyen Signed-off-by: Brandon Maier Link: https://lore.kernel.org/r/20221110192108.20624-1-brandon.maier@collins.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc2947-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c index 7404e974762f..2dbbbac9de09 100644 --- a/drivers/hwmon/ltc2947-core.c +++ b/drivers/hwmon/ltc2947-core.c @@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val, return ret; /* in milidegrees celcius, temp is given by: */ - *val = (__val * 204) + 550; + *val = (__val * 204) + 5500; return 0; } From b8d27d2ce8dfc207e4b67b929a86f2be76fbc6ef Mon Sep 17 00:00:00 2001 From: Ninad Malwade Date: Tue, 8 Nov 2022 12:45:08 +0800 Subject: [PATCH 479/963] hwmon: (ina3221) Fix shunt sum critical calculation The shunt sum critical limit register value should be left shifted by one bit as its LSB-0 is a reserved bit. Fixes: 2057bdfb7184 ("hwmon: (ina3221) Add summation feature support") Signed-off-by: Ninad Malwade Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/20221108044508.23463-1-nmalwade@nvidia.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ina3221.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index 2a57f4b60c29..e06186986444 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, * Shunt Voltage Sum register has 14-bit value with 1-bit shift * Other Shunt Voltage registers have 12 bits with 3-bit shift */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) *val = sign_extend32(regval >> 1, 14); else *val = sign_extend32(regval >> 3, 12); @@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr, * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe; else regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8; From 3b7f98f237528c496ea0b689bace0e35eec3e060 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 12 Nov 2022 20:56:06 +0800 Subject: [PATCH 480/963] hwmon: (i5500_temp) fix missing pci_disable_device() pci_disable_device() need be called while module exiting, switch to use pcim_enable(), pci_disable_device() will be called in pcim_release(). Fixes: ada072816be1 ("hwmon: (i5500_temp) New driver for the Intel 5500/5520/X58 chipsets") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221112125606.3751430-1-yangyingliang@huawei.com Signed-off-by: Guenter Roeck --- drivers/hwmon/i5500_temp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c index 05f68e9c9477..23b9f94fe0a9 100644 --- a/drivers/hwmon/i5500_temp.c +++ b/drivers/hwmon/i5500_temp.c @@ -117,7 +117,7 @@ static int i5500_temp_probe(struct pci_dev *pdev, u32 tstimer; s8 tsfsc; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Failed to enable device\n"); return err; From e2a87785aab0dac190ac89be6a9ba955e2c634f2 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 17 Nov 2022 11:44:23 +0800 Subject: [PATCH 481/963] hwmon: (ibmpex) Fix possible UAF when ibmpex_register_bmc() fails Smatch report warning as follows: drivers/hwmon/ibmpex.c:509 ibmpex_register_bmc() warn: '&data->list' not removed from list If ibmpex_find_sensors() fails in ibmpex_register_bmc(), data will be freed, but data->list will not be removed from driver_data.bmc_data, then list traversal may cause UAF. Fix by removeing it from driver_data.bmc_data before free(). Fixes: 57c7c3a0fdea ("hwmon: IBM power meter driver") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221117034423.2935739-1-cuigaosheng1@huawei.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ibmpex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index f6ec165c0fa8..1837cccd993c 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev) return; out_register: + list_del(&data->list); hwmon_device_unregister(data->hwmon_dev); out_user: ipmi_destroy_user(data->user); From 94eedf3dded5fb472ce97bfaf3ac1c6c29c35d26 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 17 Nov 2022 21:42:49 -0500 Subject: [PATCH 482/963] tracing: Fix race where eprobes can be called before the event The flag that tells the event to call its triggers after reading the event is set for eprobes after the eprobe is enabled. This leads to a race where the eprobe may be triggered at the beginning of the event where the record information is NULL. The eprobe then dereferences the NULL record causing a NULL kernel pointer bug. Test for a NULL record to keep this from happening. Link: https://lore.kernel.org/linux-trace-kernel/20221116192552.1066630-1-rafaelmendsr@gmail.com/ Link: https://lore.kernel.org/linux-trace-kernel/20221117214249.2addbe10@gandalf.local.home Cc: Linux Trace Kernel Cc: Tzvetomir Stoyanov Cc: Tom Zanussi Cc: stable@vger.kernel.org Fixes: 7491e2c442781 ("tracing: Add a probe that attaches to trace events") Acked-by: Masami Hiramatsu (Google) Reported-by: Rafael Mendonca Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_eprobe.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 5dd0617e5df6..9cda9a38422c 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -563,6 +563,9 @@ static void eprobe_trigger_func(struct event_trigger_data *data, { struct eprobe_data *edata = data->private_data; + if (unlikely(!rec)) + return; + __eprobe_trace_func(edata, rec); } From eb7081409f94a9a8608593d0fb63a1aa3d6f95d8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Nov 2022 16:02:16 -0800 Subject: [PATCH 483/963] Linux 6.1-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 58cd4f5e1c3a..6f846b1f2618 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From e204ead35401af5e120f653a133d54ee2595627e Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 19:37:12 +0800 Subject: [PATCH 484/963] nfc: nfcmrvl: Fix potential memory leak in nfcmrvl_i2c_nci_send() nfcmrvl_i2c_nci_send() will be called by nfcmrvl_nci_send(), and skb should be freed in nfcmrvl_i2c_nci_send(). However, nfcmrvl_nci_send() won't free the skb when it failed for the test_bit(). Free the skb when test_bit() failed. Fixes: b5b3e23e4cac ("NFC: nfcmrvl: add i2c driver") Signed-off-by: Shang XiaoJing Suggested-by: Pavel Machek Signed-off-by: David S. Miller --- drivers/nfc/nfcmrvl/i2c.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index 24436c9e54c9..97600826af69 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -112,8 +112,10 @@ static int nfcmrvl_i2c_nci_send(struct nfcmrvl_private *priv, struct nfcmrvl_i2c_drv_data *drv_data = priv->drv_data; int ret; - if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags)) + if (test_bit(NFCMRVL_PHY_ERROR, &priv->flags)) { + kfree_skb(skb); return -EREMOTEIO; + } ret = i2c_master_send(drv_data->i2c, skb->data, skb->len); From 614761e1119c994a7f19e4c9f37b1d2d7fe7306e Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 19:37:13 +0800 Subject: [PATCH 485/963] nfc: nxp-nci: Fix potential memory leak in nxp_nci_send() nxp_nci_send() won't free the skb when it failed for the check before write(). As the result, the skb will memleak. Free the skb when the check failed. Fixes: dece45855a8b ("NFC: nxp-nci: Add support for NXP NCI chips") Signed-off-by: Shang XiaoJing Suggested-by: Pavel Machek Signed-off-by: David S. Miller --- drivers/nfc/nxp-nci/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c index 580cb6ecffee..66b198663387 100644 --- a/drivers/nfc/nxp-nci/core.c +++ b/drivers/nfc/nxp-nci/core.c @@ -73,11 +73,15 @@ static int nxp_nci_send(struct nci_dev *ndev, struct sk_buff *skb) struct nxp_nci_info *info = nci_get_drvdata(ndev); int r; - if (!info->phy_ops->write) + if (!info->phy_ops->write) { + kfree_skb(skb); return -EOPNOTSUPP; + } - if (info->mode != NXP_NCI_MODE_NCI) + if (info->mode != NXP_NCI_MODE_NCI) { + kfree_skb(skb); return -EINVAL; + } r = info->phy_ops->write(info->phy_id, skb); if (r < 0) { From 60dcb5ff55e5c5da259a0dcc4c24c842de1abc9d Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 17 Nov 2022 19:37:14 +0800 Subject: [PATCH 486/963] nfc: s3fwrn5: Fix potential memory leak in s3fwrn5_nci_send() s3fwrn5_nci_send() won't free the skb when it failed for the check before s3fwrn5_write(). As the result, the skb will memleak. Free the skb when the check failed. Fixes: c04c674fadeb ("nfc: s3fwrn5: Add driver for Samsung S3FWRN5 NFC Chip") Signed-off-by: Shang XiaoJing Suggested-by: Pavel Machek Signed-off-by: David S. Miller --- drivers/nfc/s3fwrn5/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nfc/s3fwrn5/core.c b/drivers/nfc/s3fwrn5/core.c index 0270e05b68df..aec356880adf 100644 --- a/drivers/nfc/s3fwrn5/core.c +++ b/drivers/nfc/s3fwrn5/core.c @@ -105,6 +105,7 @@ static int s3fwrn5_nci_send(struct nci_dev *ndev, struct sk_buff *skb) mutex_lock(&info->mutex); if (s3fwrn5_get_mode(info) != S3FWRN5_MODE_NCI) { + kfree_skb(skb); mutex_unlock(&info->mutex); return -EINVAL; } From 00a6c36cca760d0b659f894dee728555b193c5e1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 15 Nov 2022 10:46:20 +0000 Subject: [PATCH 487/963] drm/i915/ttm: never purge busy objects In i915_gem_madvise_ioctl() we immediately purge the object is not currently used, like when the mm.pages are NULL. With shmem the pages might still be hanging around or are perhaps swapped out. Similarly with ttm we might still have the pages hanging around on the ttm resource, like with lmem or shmem, but here we need to be extra careful since async unbinds are possible as well as in-progress kernel moves. In i915_ttm_purge() we expect the pipeline-gutting to nuke the ttm resource for us, however if it's busy the memory is only moved to a ghost object, which then leads to broken behaviour when for example clearing the i915_tt->filp, since the actual ttm_tt is still alive and populated, even though it's been moved to the ghost object. When we later destroy the ghost object we hit the following, since the filp is now NULL: [ +0.006982] #PF: supervisor read access in kernel mode [ +0.005149] #PF: error_code(0x0000) - not-present page [ +0.005147] PGD 11631d067 P4D 11631d067 PUD 115972067 PMD 0 [ +0.005676] Oops: 0000 [#1] PREEMPT SMP NOPTI [ +0.012962] Workqueue: events ttm_device_delayed_workqueue [ttm] [ +0.006022] RIP: 0010:i915_ttm_tt_unpopulate+0x3a/0x70 [i915] [ +0.005879] Code: 89 fb 48 85 f6 74 11 8b 55 4c 48 8b 7d 30 45 31 c0 31 c9 e8 18 6a e5 e0 80 7d 60 00 74 20 48 8b 45 68 8b 55 08 4c 89 e7 5b 5d <48> 8b 40 20 83 e2 01 41 5c 89 d1 48 8b 70 30 e9 42 b2 ff ff 4c 89 [ +0.018782] RSP: 0000:ffffc9000bf6fd70 EFLAGS: 00010202 [ +0.005244] RAX: 0000000000000000 RBX: ffff8883e12ae380 RCX: 0000000000000000 [ +0.007150] RDX: 000000008000000e RSI: ffffffff823559b4 RDI: ffff8883e12ae3c0 [ +0.007142] RBP: ffff888103b65d48 R08: 0000000000000001 R09: 0000000000000001 [ +0.007144] R10: 0000000000000001 R11: ffff88829c2c8040 R12: ffff8883e12ae3c0 [ +0.007148] R13: 0000000000000001 R14: ffff888115184140 R15: ffff888115184248 [ +0.007154] FS: 0000000000000000(0000) GS:ffff88844db00000(0000) knlGS:0000000000000000 [ +0.008108] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.005763] CR2: 0000000000000020 CR3: 000000013fdb4004 CR4: 00000000003706e0 [ +0.007152] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.007145] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ +0.007154] Call Trace: [ +0.002459] [ +0.002126] ttm_tt_unpopulate.part.0+0x17/0x70 [ttm] [ +0.005068] ttm_bo_tt_destroy+0x1c/0x50 [ttm] [ +0.004464] ttm_bo_cleanup_memtype_use+0x25/0x40 [ttm] [ +0.005244] ttm_bo_cleanup_refs+0x90/0x2c0 [ttm] [ +0.004721] ttm_bo_delayed_delete+0x235/0x250 [ttm] [ +0.004981] ttm_device_delayed_workqueue+0x13/0x40 [ttm] [ +0.005422] process_one_work+0x248/0x560 [ +0.004028] worker_thread+0x4b/0x390 [ +0.003682] ? process_one_work+0x560/0x560 [ +0.004199] kthread+0xeb/0x120 [ +0.003163] ? kthread_complete_and_exit+0x20/0x20 [ +0.004815] ret_from_fork+0x1f/0x30 v2: - Just use ttm_bo_wait() directly (Niranjana) - Add testcase reference Testcase: igt@gem_madvise@dontneed-evict-race Fixes: 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend") Reported-by: Niranjana Vishwanathapura Signed-off-by: Matthew Auld Cc: Andrzej Hajda Cc: Nirmoy Das Cc: # v5.15+ Reviewed-by: Niranjana Vishwanathapura Acked-by: Nirmoy Das Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20221115104620.120432-1-matthew.auld@intel.com (cherry picked from commit 5524b5e52e08f675116a93296fe5bee60bc43c03) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 3d4305eea1aa..0d6d640225fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -612,6 +612,10 @@ static int i915_ttm_truncate(struct drm_i915_gem_object *obj) WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED); + err = ttm_bo_wait(bo, true, false); + if (err) + return err; + err = i915_ttm_move_notify(bo); if (err) return err; From ebbaa4392e36521fb893973d8a0fcb32f3b6d5eb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 14 Nov 2022 14:22:43 +0200 Subject: [PATCH 488/963] drm/i915: Fix warn in intel_display_power_*_domain() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intel_display_power_*_domain() functions should always warn if a default domain is returned as a fallback, fix this up. Spotted by Ville. Fixes: 979e1b32e0e2 ("drm/i915: Sanitize the port -> DDI/AUX power domain mapping for each platform") Cc: Ville Syrjälä Cc: Jouni Högander Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221114122251.21327-2-imre.deak@intel.com (cherry picked from commit 10b85f0e1d922210ae857afed6d012ec32c4b6cb) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 1e608b9e5055..1a63da28f330 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2434,7 +2434,7 @@ intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port) { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_io == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_io == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_IO_A; return domains->ddi_io + (int)(port - domains->port_start); @@ -2445,7 +2445,7 @@ intel_display_power_ddi_lanes_domain(struct drm_i915_private *i915, enum port po { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_lanes == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_lanes == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_LANES_A; return domains->ddi_lanes + (int)(port - domains->port_start); @@ -2471,7 +2471,7 @@ intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_A; return domains->aux_legacy_usbc + (int)(aux_ch - domains->aux_ch_start); @@ -2482,7 +2482,7 @@ intel_display_power_tbt_aux_domain(struct drm_i915_private *i915, enum aux_ch au { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_tbt == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_tbt == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_TBT1; return domains->aux_tbt + (int)(aux_ch - domains->aux_ch_start); From 47894e0fa6a56a42be6a47c767e79cce8125489d Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Wed, 16 Nov 2022 09:55:58 -0800 Subject: [PATCH 489/963] virt/sev-guest: Prevent IV reuse in the SNP guest driver The AMD Secure Processor (ASP) and an SNP guest use a series of AES-GCM keys called VMPCKs to communicate securely with each other. The IV to this scheme is a sequence number that both the ASP and the guest track. Currently, this sequence number in a guest request must exactly match the sequence number tracked by the ASP. This means that if the guest sees an error from the host during a request it can only retry that exact request or disable the VMPCK to prevent an IV reuse. AES-GCM cannot tolerate IV reuse, see: "Authentication Failures in NIST version of GCM" - Antoine Joux et al. In order to address this, make handle_guest_request() delete the VMPCK on any non successful return. To allow userspace querying the cert_data length make handle_guest_request() save the number of pages required by the host, then have handle_guest_request() retry the request without requesting the extended data, then return the number of pages required back to userspace. [ bp: Massage, incorporate Tom's review comments. ] Fixes: fce96cf044308 ("virt: Add SEV-SNP guest driver") Reported-by: Peter Gonda Signed-off-by: Peter Gonda Signed-off-by: Borislav Petkov Reviewed-by: Tom Lendacky Cc: stable@kernel.org Link: https://lore.kernel.org/r/20221116175558.2373112-1-pgonda@google.com --- drivers/virt/coco/sev-guest/sev-guest.c | 84 ++++++++++++++++++++----- 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index f422f9c58ba7..1ea6d2e5b218 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -67,8 +67,27 @@ static bool is_vmpck_empty(struct snp_guest_dev *snp_dev) return true; } +/* + * If an error is received from the host or AMD Secure Processor (ASP) there + * are two options. Either retry the exact same encrypted request or discontinue + * using the VMPCK. + * + * This is because in the current encryption scheme GHCB v2 uses AES-GCM to + * encrypt the requests. The IV for this scheme is the sequence number. GCM + * cannot tolerate IV reuse. + * + * The ASP FW v1.51 only increments the sequence numbers on a successful + * guest<->ASP back and forth and only accepts messages at its exact sequence + * number. + * + * So if the sequence number were to be reused the encryption scheme is + * vulnerable. If the sequence number were incremented for a fresh IV the ASP + * will reject the request. + */ static void snp_disable_vmpck(struct snp_guest_dev *snp_dev) { + dev_alert(snp_dev->dev, "Disabling vmpck_id %d to prevent IV reuse.\n", + vmpck_id); memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN); snp_dev->vmpck = NULL; } @@ -321,34 +340,71 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in if (rc) return rc; - /* Call firmware to process the request */ + /* + * Call firmware to process the request. In this function the encrypted + * message enters shared memory with the host. So after this call the + * sequence number must be incremented or the VMPCK must be deleted to + * prevent reuse of the IV. + */ rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + + /* + * If the extended guest request fails due to having too small of a + * certificate data buffer, retry the same guest request without the + * extended data request in order to increment the sequence number + * and thus avoid IV reuse. + */ + if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST && + err == SNP_GUEST_REQ_INVALID_LEN) { + const unsigned int certs_npages = snp_dev->input.data_npages; + + exit_code = SVM_VMGEXIT_GUEST_REQUEST; + + /* + * If this call to the firmware succeeds, the sequence number can + * be incremented allowing for continued use of the VMPCK. If + * there is an error reflected in the return value, this value + * is checked further down and the result will be the deletion + * of the VMPCK and the error code being propagated back to the + * user as an ioctl() return code. + */ + rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + + /* + * Override the error to inform callers the given extended + * request buffer size was too small and give the caller the + * required buffer size. + */ + err = SNP_GUEST_REQ_INVALID_LEN; + snp_dev->input.data_npages = certs_npages; + } + if (fw_err) *fw_err = err; - if (rc) - return rc; + if (rc) { + dev_alert(snp_dev->dev, + "Detected error from ASP request. rc: %d, fw_err: %llu\n", + rc, *fw_err); + goto disable_vmpck; + } - /* - * The verify_and_dec_payload() will fail only if the hypervisor is - * actively modifying the message header or corrupting the encrypted payload. - * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that - * the key cannot be used for any communication. The key is disabled to ensure - * that AES-GCM does not use the same IV while encrypting the request payload. - */ rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz); if (rc) { dev_alert(snp_dev->dev, - "Detected unexpected decode failure, disabling the vmpck_id %d\n", - vmpck_id); - snp_disable_vmpck(snp_dev); - return rc; + "Detected unexpected decode failure from ASP. rc: %d\n", + rc); + goto disable_vmpck; } /* Increment to new message sequence after payload decryption was successful. */ snp_inc_msg_seqno(snp_dev); return 0; + +disable_vmpck: + snp_disable_vmpck(snp_dev); + return rc; } static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg) From f92a4b50f0bd7fd52391dc4bb9a309085d278f91 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 19 Nov 2022 16:11:34 +0800 Subject: [PATCH 490/963] Drivers: hv: vmbus: fix double free in the error path of vmbus_add_channel_work() In the error path of vmbus_device_register(), device_unregister() is called, which calls vmbus_device_release(). The latter frees the struct hv_device that was passed in to vmbus_device_register(). So remove the kfree() in vmbus_add_channel_work() to avoid a double free. Fixes: c2e5df616e1a ("vmbus: add per-channel sysfs info") Suggested-by: Michael Kelley Signed-off-by: Yang Yingliang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20221119081135.1564691-2-yangyingliang@huawei.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 5b120402d405..cc23b90cae02 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -533,13 +533,17 @@ static void vmbus_add_channel_work(struct work_struct *work) * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() * method. + * + * If vmbus_device_register() fails, the 'device_obj' is freed in + * vmbus_device_release() as called by device_unregister() in the + * error path of vmbus_device_register(). In the outside error + * path, there's no need to free it. */ ret = vmbus_device_register(newchannel->device_obj); if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); - kfree(newchannel->device_obj); goto err_deq_chan; } From 25c94b051592c010abe92c85b0485f1faedc83f3 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 19 Nov 2022 16:11:35 +0800 Subject: [PATCH 491/963] Drivers: hv: vmbus: fix possible memory leak in vmbus_device_register() If device_register() returns error in vmbus_device_register(), the name allocated by dev_set_name() must be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(). Fixes: 09d50ff8a233 ("Staging: hv: make the Hyper-V virtual bus code build") Signed-off-by: Yang Yingliang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20221119081135.1564691-3-yangyingliang@huawei.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8b2e413bf19c..e592c481f7ae 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2082,6 +2082,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) ret = device_register(&child_device_obj->device); if (ret) { pr_err("Unable to register child device\n"); + put_device(&child_device_obj->device); return ret; } From 19d04a947db53e0d99e60aeb914d2148f61ab5f9 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Thu, 17 Nov 2022 23:07:22 +0800 Subject: [PATCH 492/963] net: microchip: sparx5: Fix return value in sparx5_tc_setup_qdisc_ets() Function sparx5_tc_setup_qdisc_ets() always returns negative value because it return -EOPNOTSUPP in the end. This patch returns the rersult of sparx5_tc_ets_add() and sparx5_tc_ets_del() directly. Fixes: 211225428d65 ("net: microchip: sparx5: add support for offloading ets qdisc") Signed-off-by: Lu Wei Reviewed-by: Daniel Machon Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/sparx5/sparx5_tc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c b/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c index e05429c751ee..dc2c3756e3a2 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_tc.c @@ -90,13 +90,10 @@ static int sparx5_tc_setup_qdisc_ets(struct net_device *ndev, } } - sparx5_tc_ets_add(port, params); - break; + return sparx5_tc_ets_add(port, params); case TC_ETS_DESTROY: - sparx5_tc_ets_del(port); - - break; + return sparx5_tc_ets_del(port); case TC_ETS_GRAFT: return -EOPNOTSUPP; From 83f638bca0ccd94942bc3c4eb9bcec24dd8a1cf9 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 21 Nov 2022 19:02:57 +0800 Subject: [PATCH 493/963] LoongArch: Makefile: Use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E Fix this up by changing the LoongArch Makefile to use "grep -E" instead. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index f4cb54d5afd6..01b57b726322 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -97,7 +97,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_LOONGARCH CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ + grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif From 538eafc6deae12fbac5f277b89aa139b812bca49 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Nov 2022 19:02:57 +0800 Subject: [PATCH 494/963] LoongArch: Combine acpi_boot_table_init() and acpi_boot_init() Combine acpi_boot_table_init() and acpi_boot_init() since they are very simple, and we don't need to check the return value of acpi_boot_init(). Signed-off-by: Huacai Chen --- arch/loongarch/kernel/acpi.c | 31 ++++++++++--------------------- arch/loongarch/kernel/setup.c | 1 - 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c index 335398482038..8319cc409009 100644 --- a/arch/loongarch/kernel/acpi.c +++ b/arch/loongarch/kernel/acpi.c @@ -56,23 +56,6 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) return ioremap_cache(phys, size); } -void __init acpi_boot_table_init(void) -{ - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return; - - /* - * Initialize the ACPI boot-time table parser. - */ - if (acpi_table_init()) { - disable_acpi(); - return; - } -} - #ifdef CONFIG_SMP static int set_processor_mask(u32 id, u32 flags) { @@ -156,13 +139,21 @@ static void __init acpi_process_madt(void) loongson_sysconf.nr_cpus = num_processors; } -int __init acpi_boot_init(void) +void __init acpi_boot_table_init(void) { /* * If acpi_disabled, bail out */ if (acpi_disabled) - return -1; + return; + + /* + * Initialize the ACPI boot-time table parser. + */ + if (acpi_table_init()) { + disable_acpi(); + return; + } loongson_sysconf.boot_cpu_id = read_csr_cpuid(); @@ -173,8 +164,6 @@ int __init acpi_boot_init(void) /* Do not enable ACPI SPCR console by default */ acpi_parse_spcr(earlycon_acpi_spcr_enable, false); - - return 0; } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 1eb63fa9bc81..ae436def7ee9 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -257,7 +257,6 @@ void __init platform_init(void) #ifdef CONFIG_ACPI acpi_gbl_use_default_register_widths = false; acpi_boot_table_init(); - acpi_boot_init(); #endif #ifdef CONFIG_NUMA From c56ab8e85d6de5c2c5ba37e7a5698b2eb0c80ef5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Nov 2022 19:02:57 +0800 Subject: [PATCH 495/963] LoongArch: SMP: Change prefix from loongson3 to loongson SMP operations can be shared by Loongson-2 series and Loongson-3 series, so we change the prefix from loongson3 to loongson for all functions and data structures. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/irq.h | 2 +- arch/loongarch/include/asm/smp.h | 30 +++++++++++----------- arch/loongarch/kernel/irq.c | 2 +- arch/loongarch/kernel/smp.c | 44 ++++++++++++++++---------------- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index d06d4542b634..5332b1433f38 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -117,7 +117,7 @@ extern struct fwnode_handle *liointc_handle; extern struct fwnode_handle *pch_lpc_handle; extern struct fwnode_handle *pch_pic_handle[MAX_IO_PICS]; -extern irqreturn_t loongson3_ipi_interrupt(int irq, void *dev); +extern irqreturn_t loongson_ipi_interrupt(int irq, void *dev); #include diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 71189b28bfb2..3dd172d9ffea 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -19,21 +19,21 @@ extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; extern cpumask_t cpu_foreign_map[]; -void loongson3_smp_setup(void); -void loongson3_prepare_cpus(unsigned int max_cpus); -void loongson3_boot_secondary(int cpu, struct task_struct *idle); -void loongson3_init_secondary(void); -void loongson3_smp_finish(void); -void loongson3_send_ipi_single(int cpu, unsigned int action); -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action); +void loongson_smp_setup(void); +void loongson_prepare_cpus(unsigned int max_cpus); +void loongson_boot_secondary(int cpu, struct task_struct *idle); +void loongson_init_secondary(void); +void loongson_smp_finish(void); +void loongson_send_ipi_single(int cpu, unsigned int action); +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action); #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void); -void loongson3_cpu_die(unsigned int cpu); +int loongson_cpu_disable(void); +void loongson_cpu_die(unsigned int cpu); #endif static inline void plat_smp_setup(void) { - loongson3_smp_setup(); + loongson_smp_setup(); } static inline int raw_smp_processor_id(void) @@ -85,28 +85,28 @@ extern void show_ipi_list(struct seq_file *p, int prec); */ static inline void smp_send_reschedule(int cpu) { - loongson3_send_ipi_single(cpu, SMP_RESCHEDULE); + loongson_send_ipi_single(cpu, SMP_RESCHEDULE); } static inline void arch_send_call_function_single_ipi(int cpu) { - loongson3_send_ipi_single(cpu, SMP_CALL_FUNCTION); + loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); } static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - loongson3_send_ipi_mask(mask, SMP_CALL_FUNCTION); + loongson_send_ipi_mask(mask, SMP_CALL_FUNCTION); } #ifdef CONFIG_HOTPLUG_CPU static inline int __cpu_disable(void) { - return loongson3_cpu_disable(); + return loongson_cpu_disable(); } static inline void __cpu_die(unsigned int cpu) { - loongson3_cpu_die(cpu); + loongson_cpu_die(cpu); } extern void play_dead(void); diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c index 1ba19c76563e..0524bf1169b7 100644 --- a/arch/loongarch/kernel/irq.c +++ b/arch/loongarch/kernel/irq.c @@ -117,7 +117,7 @@ void __init init_IRQ(void) if (ipi_irq < 0) panic("IPI IRQ mapping failed\n"); irq_set_percpu_devid(ipi_irq); - r = request_percpu_irq(ipi_irq, loongson3_ipi_interrupt, "IPI", &ipi_dummy_dev); + r = request_percpu_irq(ipi_irq, loongson_ipi_interrupt, "IPI", &ipi_dummy_dev); if (r < 0) panic("IPI IRQ request failed\n"); #endif diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 781a4d4bdddc..6ed72f7ff278 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -136,12 +136,12 @@ static void ipi_write_action(int cpu, u32 action) } } -void loongson3_send_ipi_single(int cpu, unsigned int action) +void loongson_send_ipi_single(int cpu, unsigned int action) { ipi_write_action(cpu_logical_map(cpu), (u32)action); } -void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) +void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) { unsigned int i; @@ -149,7 +149,7 @@ void loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } -irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) +irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; unsigned int cpu = smp_processor_id(); @@ -169,7 +169,7 @@ irqreturn_t loongson3_ipi_interrupt(int irq, void *dev) return IRQ_HANDLED; } -void __init loongson3_smp_setup(void) +void __init loongson_smp_setup(void) { cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package; cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; @@ -178,7 +178,7 @@ void __init loongson3_smp_setup(void) pr_info("Detected %i available CPU(s)\n", loongson_sysconf.nr_cpus); } -void __init loongson3_prepare_cpus(unsigned int max_cpus) +void __init loongson_prepare_cpus(unsigned int max_cpus) { int i = 0; @@ -193,7 +193,7 @@ void __init loongson3_prepare_cpus(unsigned int max_cpus) /* * Setup the PC, SP, and TP of a secondary processor and start it running! */ -void loongson3_boot_secondary(int cpu, struct task_struct *idle) +void loongson_boot_secondary(int cpu, struct task_struct *idle) { unsigned long entry; @@ -205,13 +205,13 @@ void loongson3_boot_secondary(int cpu, struct task_struct *idle) csr_mail_send(entry, cpu_logical_map(cpu), 0); - loongson3_send_ipi_single(cpu, SMP_BOOT_CPU); + loongson_send_ipi_single(cpu, SMP_BOOT_CPU); } /* * SMP init and finish on secondary CPUs */ -void loongson3_init_secondary(void) +void loongson_init_secondary(void) { unsigned int cpu = smp_processor_id(); unsigned int imask = ECFGF_IP0 | ECFGF_IP1 | ECFGF_IP2 | @@ -231,7 +231,7 @@ void loongson3_init_secondary(void) cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; } -void loongson3_smp_finish(void) +void loongson_smp_finish(void) { local_irq_enable(); iocsr_write64(0, LOONGARCH_IOCSR_MBUF0); @@ -240,7 +240,7 @@ void loongson3_smp_finish(void) #ifdef CONFIG_HOTPLUG_CPU -int loongson3_cpu_disable(void) +int loongson_cpu_disable(void) { unsigned long flags; unsigned int cpu = smp_processor_id(); @@ -262,7 +262,7 @@ int loongson3_cpu_disable(void) return 0; } -void loongson3_cpu_die(unsigned int cpu) +void loongson_cpu_die(unsigned int cpu) { while (per_cpu(cpu_state, cpu) != CPU_DEAD) cpu_relax(); @@ -300,19 +300,19 @@ void play_dead(void) */ #ifdef CONFIG_PM -static int loongson3_ipi_suspend(void) +static int loongson_ipi_suspend(void) { return 0; } -static void loongson3_ipi_resume(void) +static void loongson_ipi_resume(void) { iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_EN); } -static struct syscore_ops loongson3_ipi_syscore_ops = { - .resume = loongson3_ipi_resume, - .suspend = loongson3_ipi_suspend, +static struct syscore_ops loongson_ipi_syscore_ops = { + .resume = loongson_ipi_resume, + .suspend = loongson_ipi_suspend, }; /* @@ -321,7 +321,7 @@ static struct syscore_ops loongson3_ipi_syscore_ops = { */ static int __init ipi_pm_init(void) { - register_syscore_ops(&loongson3_ipi_syscore_ops); + register_syscore_ops(&loongson_ipi_syscore_ops); return 0; } @@ -425,7 +425,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) { init_new_context(current, &init_mm); current_thread_info()->cpu = 0; - loongson3_prepare_cpus(max_cpus); + loongson_prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); calculate_cpu_foreign_map(); @@ -436,7 +436,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - loongson3_boot_secondary(cpu, tidle); + loongson_boot_secondary(cpu, tidle); /* Wait for CPU to start and be ready to sync counters */ if (!wait_for_completion_timeout(&cpu_starting, @@ -465,7 +465,7 @@ asmlinkage void start_secondary(void) cpu_probe(); constant_clockevent_init(); - loongson3_init_secondary(); + loongson_init_secondary(); set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); @@ -487,11 +487,11 @@ asmlinkage void start_secondary(void) complete(&cpu_running); /* - * irq will be enabled in loongson3_smp_finish(), enabling it too + * irq will be enabled in loongson_smp_finish(), enabling it too * early is dangerous. */ WARN_ON_ONCE(!irqs_disabled()); - loongson3_smp_finish(); + loongson_smp_finish(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } From e428e9613531d1ef6bd0d91352899712b29134fb Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Nov 2022 19:02:57 +0800 Subject: [PATCH 496/963] LoongArch: Clear FPU/SIMD thread info flags for kernel thread If a kernel thread is created by a user thread, it may carry FPU/SIMD thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be considered as a fpu owner and kernel try to save its FPU/SIMD context and cause such errors: [ 41.518931] do_fpu invoked from kernel context![#1]: [ 41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217 [ 41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022 [ 41.544064] $ 0 : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0 [ 41.552101] $ 4 : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818 [ 41.560138] $ 8 : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc [ 41.568174] $12 : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130 [ 41.576211] $16 : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000 [ 41.584247] $20 : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400 [ 41.592284] $24 : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0 [ 41.600320] $28 : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140 [ 41.608356] era : 9000000000228848 _save_fp+0x0/0xd8 [ 41.613542] ra : 90000000011e9468 __schedule+0x568/0x8d0 [ 41.619160] CSR crmd: 000000b0 [ 41.619163] CSR prmd: 00000000 [ 41.622359] CSR euen: 00000000 [ 41.625558] CSR ecfg: 00071c1c [ 41.628756] CSR estat: 000f0000 [ 41.635239] ExcCode : f (SubCode 0) [ 41.638783] PrId : 0014c010 (Loongson-64bit) [ 41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs [ 41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be) [ 41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0 [ 41.670412] 0000000000000001 0000000000000000 9000000106535880 0000000000000001 [ 41.678450] 9000000105291800 0000000000000000 9000000105291838 900000000178e000 [ 41.686487] 9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834 [ 41.694523] 00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830 [ 41.702561] 0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0 [ 41.710597] 9000000128800001 9000000106305140 9000000105291838 9000000105291838 [ 41.718634] 9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0 [ 41.726672] 9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933 [ 41.734708] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 41.742745] ... [ 41.745252] Call Trace: [ 42.197868] [<9000000000228848>] _save_fp+0x0/0xd8 [ 42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0 [ 42.210485] [<90000000011ed834>] schedule+0x64/0xd4 [ 42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188 [ 42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350 [ 42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c This can be easily triggered by ltp testcase syscalls/io_uring02 and it can also be easily fixed by clearing the FPU/SIMD thread info flags for kernel threads in copy_thread(). Cc: stable@vger.kernel.org Reported-by: Qi Hu Signed-off-by: Huacai Chen --- arch/loongarch/kernel/process.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2526b68f1c0f..ddb8ba4eb399 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -152,7 +152,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->csr_crmd = p->thread.csr_crmd; childregs->csr_prmd = p->thread.csr_prmd; childregs->csr_ecfg = p->thread.csr_ecfg; - return 0; + goto out; } /* user thread */ @@ -171,14 +171,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) */ childregs->csr_euen = 0; + if (clone_flags & CLONE_SETTLS) + childregs->regs[2] = tls; + +out: clear_tsk_thread_flag(p, TIF_USEDFPU); clear_tsk_thread_flag(p, TIF_USEDSIMD); clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE); clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE); - if (clone_flags & CLONE_SETTLS) - childregs->regs[2] = tls; - return 0; } From bf2f34a506e66e2979de6b17c337c5d4b25b4d2c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Nov 2022 19:02:57 +0800 Subject: [PATCH 497/963] LoongArch: Set _PAGE_DIRTY only if _PAGE_WRITE is set in {pmd,pte}_mkdirty() Now {pmd,pte}_mkdirty() set _PAGE_DIRTY bit unconditionally, this causes random segmentation fault after commit 0ccf7f168e17bb7e ("mm/thp: carry over dirty bit when thp splits on pmd"). The reason is: when fork(), parent process use pmd_wrprotect() to clear huge page's _PAGE_WRITE and _PAGE_DIRTY (for COW); then pte_mkdirty() set _PAGE_DIRTY as well as _PAGE_MODIFIED while splitting dirty huge pages; once _PAGE_DIRTY is set, there will be no tlb modify exception so the COW machanism fails; and at last memory corruption occurred between parent and child processes. So, we should set _PAGE_DIRTY only when _PAGE_WRITE is set in {pmd,pte}_ mkdirty(). Cc: stable@vger.kernel.org Cc: Peter Xu Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 946704bee599..debbe116f105 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -349,7 +349,9 @@ static inline pte_t pte_mkclean(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pte_val(pte) |= _PAGE_MODIFIED; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) |= _PAGE_DIRTY; return pte; } @@ -478,7 +480,9 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_DIRTY | _PAGE_MODIFIED); + pmd_val(pmd) |= _PAGE_MODIFIED; + if (pmd_val(pmd) & _PAGE_WRITE) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } From 54e6cd42a183b602e3627ad3aaeeed44f7443e67 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 21 Nov 2022 19:02:57 +0800 Subject: [PATCH 498/963] LoongArch: Set _PAGE_DIRTY only if _PAGE_MODIFIED is set in {pmd,pte}_mkwrite() Set _PAGE_DIRTY only if _PAGE_MODIFIED is set in {pmd,pte}_mkwrite(). Otherwise, _PAGE_DIRTY silences the TLB modify exception and make us have no chance to mark a pmd/pte dirty (_PAGE_MODIFIED) for software. Reviewed-by: Guo Ren Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgtable.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index debbe116f105..aa0e0e0d4ee5 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -357,7 +357,9 @@ static inline pte_t pte_mkdirty(pte_t pte) static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= (_PAGE_WRITE | _PAGE_DIRTY); + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_MODIFIED) + pte_val(pte) |= _PAGE_DIRTY; return pte; } @@ -457,7 +459,9 @@ static inline int pmd_write(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= (_PAGE_WRITE | _PAGE_DIRTY); + pmd_val(pmd) |= _PAGE_WRITE; + if (pmd_val(pmd) & _PAGE_MODIFIED) + pmd_val(pmd) |= _PAGE_DIRTY; return pmd; } From b96e74bb439f096168c78ba3ba1599e0b85cfd73 Mon Sep 17 00:00:00 2001 From: KaiLong Wang Date: Mon, 21 Nov 2022 19:02:57 +0800 Subject: [PATCH 499/963] LoongArch: Fix unsigned comparison with less than zero Eliminate the following coccicheck warning: ./arch/loongarch/kernel/unwind_prologue.c:84:5-13: WARNING: Unsigned expression compared with zero: frame_ra < 0 Signed-off-by: KaiLong Wang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/unwind_prologue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index b206d9159205..4571c3c87cd4 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -43,7 +43,8 @@ static bool unwind_by_prologue(struct unwind_state *state) { struct stack_info *info = &state->stack_info; union loongarch_instruction *ip, *ip_end; - unsigned long frame_size = 0, frame_ra = -1; + long frame_ra = -1; + unsigned long frame_size = 0; unsigned long size, offset, pc = state->pc; if (state->sp >= info->end || state->sp < info->begin) From bd5e1e42826f18147afb0ba07e6a815f52cf8bcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20D=C3=ADaz?= Date: Thu, 17 Nov 2022 21:44:21 -0600 Subject: [PATCH 500/963] selftests/net: Find nettest in current directory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `nettest` binary, built from `selftests/net/nettest.c`, was expected to be found in the path during test execution of `fcnal-test.sh` and `pmtu.sh`, leading to tests getting skipped when the binary is not installed in the system, as can be seen in these logs found in the wild [1]: # TEST: vti4: PMTU exceptions [SKIP] [ 350.600250] IPv6: ADDRCONF(NETDEV_CHANGE): veth_b: link becomes ready [ 350.607421] IPv6: ADDRCONF(NETDEV_CHANGE): veth_a: link becomes ready # 'nettest' command not found; skipping tests # xfrm6udp not supported # TEST: vti6: PMTU exceptions (ESP-in-UDP) [SKIP] [ 351.605102] IPv6: ADDRCONF(NETDEV_CHANGE): veth_b: link becomes ready [ 351.612243] IPv6: ADDRCONF(NETDEV_CHANGE): veth_a: link becomes ready # 'nettest' command not found; skipping tests # xfrm4udp not supported The `unicast_extensions.sh` tests also rely on `nettest`, but it runs fine there because it looks for the binary in the current working directory [2]: The same mechanism that works for the Unicast extensions tests is here copied over to the PMTU and functional tests. [1] https://lkft.validation.linaro.org/scheduler/job/5839508#L6221 [2] https://lkft.validation.linaro.org/scheduler/job/5839508#L7958 Signed-off-by: Daniel Díaz Signed-off-by: David S. Miller --- tools/testing/selftests/net/fcnal-test.sh | 11 +++++++---- tools/testing/selftests/net/pmtu.sh | 10 ++++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 31c3b6ebd388..21ca91473c09 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4196,10 +4196,13 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -which nettest >/dev/null -if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit $ksft_skip + fi fi declare -i nfail=0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 736e358dc549..dfe3d287f01d 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -686,10 +686,12 @@ setup_xfrm() { } setup_nettest_xfrm() { - which nettest >/dev/null - if [ $? -ne 0 ]; then - echo "'nettest' command not found; skipping tests" - return 1 + if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + return 1 + fi fi [ ${1} -eq 6 ] && proto="-6" || proto="" From 764f8485890d4988a2083f5dea90cfe0116b25f6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 20:21:52 -0800 Subject: [PATCH 501/963] ipv4/fib: Replace zero-length array with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated[1] and are being replaced with flexible array members in support of the ongoing efforts to tighten the FORTIFY_SOURCE routines on memcpy(), correctly instrument array indexing with UBSAN_BOUNDS, and to globally enable -fstrict-flex-arrays=3. Replace zero-length array with flexible-array member in struct key_vector. This results in no differences in binary output. [1] https://github.com/KSPP/linux/issues/78 Cc: Jakub Kicinski Cc: "David S. Miller" Cc: Hideaki YOSHIFUJI Cc: David Ahern Cc: Eric Dumazet Cc: Paolo Abeni Cc: "Gustavo A. R. Silva" Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Gustavo A. R. Silva Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 452ff177e4da..c88bf856c443 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -126,7 +126,7 @@ struct key_vector { /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ struct hlist_head leaf; /* This array is valid if (pos | bits) > 0 (TNODE) */ - struct key_vector __rcu *tnode[0]; + DECLARE_FLEX_ARRAY(struct key_vector __rcu *, tnode); }; }; From badbda1a01860c80c6ab60f329ef46c713653a27 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Nov 2022 18:07:54 +0300 Subject: [PATCH 502/963] octeontx2-af: cn10k: mcs: Fix copy and paste bug in mcs_bbe_intr_handler() This code accidentally uses the RX macro twice instead of the RX and TX. Fixes: 6c635f78c474 ("octeontx2-af: cn10k: mcs: Handle MCS block interrupts") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/mcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c index 4a343f853b28..c0bedf402da9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs.c @@ -951,7 +951,7 @@ static void mcs_bbe_intr_handler(struct mcs *mcs, u64 intr, enum mcs_direction d else event.intr_mask = (dir == MCS_RX) ? MCS_BBE_RX_PLFIFO_OVERFLOW_INT : - MCS_BBE_RX_PLFIFO_OVERFLOW_INT; + MCS_BBE_TX_PLFIFO_OVERFLOW_INT; /* Notify the lmac_id info which ran into BBE fatal error */ event.lmac_id = i & 0x3ULL; From aaa65d17eec372c6a9756833f3964ba05b05ea14 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 15 Nov 2022 11:17:05 -0800 Subject: [PATCH 503/963] x86/tsx: Add a feature bit for TSX control MSR support Support for the TSX control MSR is enumerated in MSR_IA32_ARCH_CAPABILITIES. This is different from how other CPU features are enumerated i.e. via CPUID. Currently, a call to tsx_ctrl_is_supported() is required for enumerating the feature. In the absence of a feature bit for TSX control, any code that relies on checking feature bits directly will not work. In preparation for adding a feature bit check in MSR save/restore during suspend/resume, set a new feature bit X86_FEATURE_TSX_CTRL when MSR_IA32_TSX_CTRL is present. Also make tsx_ctrl_is_supported() use the new feature bit to avoid any overhead of reading the MSR. [ bp: Remove tsx_ctrl_is_supported(), add room for two more feature bits in word 11 which are coming up in the next merge window. ] Suggested-by: Andrew Cooper Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Cc: Link: https://lore.kernel.org/r/de619764e1d98afbb7a5fa58424f1278ede37b45.1668539735.git.pawan.kumar.gupta@linux.intel.com --- arch/x86/include/asm/cpufeatures.h | 3 +++ arch/x86/kernel/cpu/tsx.c | 38 +++++++++++++----------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index b71f4f2ecdd5..b2da7cb64b31 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -305,6 +305,9 @@ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ + +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index ec7bbac3a9f2..8009c8346d8f 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -58,24 +58,6 @@ static void tsx_enable(void) wrmsrl(MSR_IA32_TSX_CTRL, tsx); } -static bool tsx_ctrl_is_supported(void) -{ - u64 ia32_cap = x86_read_arch_cap_msr(); - - /* - * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this - * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. - * - * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a - * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES - * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get - * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, - * tsx= cmdline requests will do nothing on CPUs without - * MSR_IA32_TSX_CTRL support. - */ - return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); -} - static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) { if (boot_cpu_has_bug(X86_BUG_TAA)) @@ -135,7 +117,7 @@ static void tsx_clear_cpuid(void) rdmsrl(MSR_TSX_FORCE_ABORT, msr); msr |= MSR_TFA_TSX_CPUID_CLEAR; wrmsrl(MSR_TSX_FORCE_ABORT, msr); - } else if (tsx_ctrl_is_supported()) { + } else if (cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL)) { rdmsrl(MSR_IA32_TSX_CTRL, msr); msr |= TSX_CTRL_CPUID_CLEAR; wrmsrl(MSR_IA32_TSX_CTRL, msr); @@ -158,7 +140,8 @@ static void tsx_dev_mode_disable(void) u64 mcu_opt_ctrl; /* Check if RTM_ALLOW exists */ - if (!boot_cpu_has_bug(X86_BUG_TAA) || !tsx_ctrl_is_supported() || + if (!boot_cpu_has_bug(X86_BUG_TAA) || + !cpu_feature_enabled(X86_FEATURE_MSR_TSX_CTRL) || !cpu_feature_enabled(X86_FEATURE_SRBDS_CTRL)) return; @@ -191,7 +174,20 @@ void __init tsx_init(void) return; } - if (!tsx_ctrl_is_supported()) { + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + if (x86_read_arch_cap_msr() & ARCH_CAP_TSX_CTRL_MSR) { + setup_force_cpu_cap(X86_FEATURE_MSR_TSX_CTRL); + } else { tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED; return; } From 50bcceb7724e471d9b591803889df45dcbb584bc Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Tue, 15 Nov 2022 11:17:06 -0800 Subject: [PATCH 504/963] x86/pm: Add enumeration check before spec MSRs save/restore setup pm_save_spec_msr() keeps a list of all the MSRs which _might_ need to be saved and restored at hibernate and resume. However, it has zero awareness of CPU support for these MSRs. It mostly works by unconditionally attempting to manipulate these MSRs and relying on rdmsrl_safe() being able to handle a #GP on CPUs where the support is unavailable. However, it's possible for reads (RDMSR) to be supported for a given MSR while writes (WRMSR) are not. In this case, msr_build_context() sees a successful read (RDMSR) and marks the MSR as valid. Then, later, a write (WRMSR) fails, producing a nasty (but harmless) error message. This causes restore_processor_state() to try and restore it, but writing this MSR is not allowed on the Intel Atom N2600 leading to: unchecked MSR access error: WRMSR to 0x122 (tried to write 0x0000000000000002) \ at rIP: 0xffffffff8b07a574 (native_write_msr+0x4/0x20) Call Trace: restore_processor_state x86_acpi_suspend_lowlevel acpi_suspend_enter suspend_devices_and_enter pm_suspend.cold state_store kernfs_fop_write_iter vfs_write ksys_write do_syscall_64 ? do_syscall_64 ? up_read ? lock_is_held_type ? asm_exc_page_fault ? lockdep_hardirqs_on entry_SYSCALL_64_after_hwframe To fix this, add the corresponding X86_FEATURE bit for each MSR. Avoid trying to manipulate the MSR when the feature bit is clear. This required adding a X86_FEATURE bit for MSRs that do not have one already, but it's a small price to pay. [ bp: Move struct msr_enumeration inside the only function that uses it. ] Fixes: 73924ec4d560 ("x86/pm: Save the MSR validity status at context setup") Reported-by: Hans de Goede Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Reviewed-by: Dave Hansen Acked-by: Rafael J. Wysocki Cc: Link: https://lore.kernel.org/r/c24db75d69df6e66c0465e13676ad3f2837a2ed8.1668539735.git.pawan.kumar.gupta@linux.intel.com --- arch/x86/power/cpu.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 4cd39f304e20..93ae33248f42 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -513,16 +513,23 @@ static int pm_cpu_check(const struct x86_cpu_id *c) static void pm_save_spec_msr(void) { - u32 spec_msr_id[] = { - MSR_IA32_SPEC_CTRL, - MSR_IA32_TSX_CTRL, - MSR_TSX_FORCE_ABORT, - MSR_IA32_MCU_OPT_CTRL, - MSR_AMD64_LS_CFG, - MSR_AMD64_DE_CFG, + struct msr_enumeration { + u32 msr_no; + u32 feature; + } msr_enum[] = { + { MSR_IA32_SPEC_CTRL, X86_FEATURE_MSR_SPEC_CTRL }, + { MSR_IA32_TSX_CTRL, X86_FEATURE_MSR_TSX_CTRL }, + { MSR_TSX_FORCE_ABORT, X86_FEATURE_TSX_FORCE_ABORT }, + { MSR_IA32_MCU_OPT_CTRL, X86_FEATURE_SRBDS_CTRL }, + { MSR_AMD64_LS_CFG, X86_FEATURE_LS_CFG_SSBD }, + { MSR_AMD64_DE_CFG, X86_FEATURE_LFENCE_RDTSC }, }; + int i; - msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); + for (i = 0; i < ARRAY_SIZE(msr_enum); i++) { + if (boot_cpu_has(msr_enum[i].feature)) + msr_build_context(&msr_enum[i].msr_no, 1); + } } static int pm_check_save_msr(void) From c51f0e6a1254b3ac2d308e1c6fd8fb936992b455 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Nov 2022 10:39:44 +0100 Subject: [PATCH 505/963] btrfs: zoned: fix missing endianness conversion in sb_write_pointer generation is an on-disk __le64 value, so use btrfs_super_generation to convert it to host endian before comparing it. Fixes: 12659251ca5d ("btrfs: implement log-structured superblock for ZONED mode") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1912abf6d020..44d8177eeffc 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -134,7 +134,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, super[i] = page_address(page[i]); } - if (super[0]->generation > super[1]->generation) + if (btrfs_super_generation(super[0]) > + btrfs_super_generation(super[1])) sector = zones[1].start; else sector = zones[0].start; From a11452a3709e217492798cf3686ac2cc8eb3fb51 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 15 Nov 2022 16:29:44 +0000 Subject: [PATCH 506/963] btrfs: send: avoid unaligned encoded writes when attempting to clone range When trying to see if we can clone a file range, there are cases where we end up sending two write operations in case the inode from the source root has an i_size that is not sector size aligned and the length from the current offset to its i_size is less than the remaining length we are trying to clone. Issuing two write operations when we could instead issue a single write operation is not incorrect. However it is not optimal, specially if the extents are compressed and the flag BTRFS_SEND_FLAG_COMPRESSED was passed to the send ioctl. In that case we can end up sending an encoded write with an offset that is not sector size aligned, which makes the receiver fallback to decompressing the data and writing it using regular buffered IO (so re-compressing the data in case the fs is mounted with compression enabled), because encoded writes fail with -EINVAL when an offset is not sector size aligned. The following example, which triggered a bug in the receiver code for the fallback logic of decompressing + regular buffer IO and is fixed by the patchset referred in a Link at the bottom of this changelog, is an example where we have the non-optimal behaviour due to an unaligned encoded write: $ cat test.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV > /dev/null mount -o compress $DEV $MNT # File foo has a size of 33K, not aligned to the sector size. xfs_io -f -c "pwrite -S 0xab 0 33K" $MNT/foo xfs_io -f -c "pwrite -S 0xcd 0 64K" $MNT/bar # Now clone the first 32K of file bar into foo at offset 0. xfs_io -c "reflink $MNT/bar 0 0 32K" $MNT/foo # Snapshot the default subvolume and create a full send stream (v2). btrfs subvolume snapshot -r $MNT $MNT/snap btrfs send --compressed-data -f /tmp/test.send $MNT/snap echo -e "\nFile bar in the original filesystem:" od -A d -t x1 $MNT/snap/bar umount $MNT mkfs.btrfs -f $DEV > /dev/null mount $DEV $MNT echo -e "\nReceiving stream in a new filesystem..." btrfs receive -f /tmp/test.send $MNT echo -e "\nFile bar in the new filesystem:" od -A d -t x1 $MNT/snap/bar umount $MNT Before this patch, the send stream included one regular write and one encoded write for file 'bar', with the later being not sector size aligned and causing the receiver to fallback to decompression + buffered writes. The output of the btrfs receive command in verbose mode (-vvv): (...) mkfile o258-7-0 rename o258-7-0 -> bar utimes clone bar - source=foo source offset=0 offset=0 length=32768 write bar - offset=32768 length=1024 encoded_write bar - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0 encoded_write bar - falling back to decompress and write due to errno 22 ("Invalid argument") (...) This patch avoids the regular write followed by an unaligned encoded write so that we end up sending a single encoded write that is aligned. So after this patch the stream content is (output of btrfs receive -vvv): (...) mkfile o258-7-0 rename o258-7-0 -> bar utimes clone bar - source=foo source offset=0 offset=0 length=32768 encoded_write bar - offset=32768, len=4096, unencoded_offset=32768, unencoded_file_len=32768, unencoded_len=65536, compression=1, encryption=0 (...) So we get more optimal behaviour and avoid the silent data loss bug in versions of btrfs-progs affected by the bug referred by the Link tag below (btrfs-progs v5.19, v5.19.1, v6.0 and v6.0.1). Link: https://lore.kernel.org/linux-btrfs/cover.1668529099.git.fdmanana@suse.com/ Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 145c84b44fd0..1c4b693ee4a3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5702,6 +5702,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, u64 ext_len; u64 clone_len; u64 clone_data_offset; + bool crossed_src_i_size = false; if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(clone_root->root, path); @@ -5759,8 +5760,10 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, if (key.offset >= clone_src_i_size) break; - if (key.offset + ext_len > clone_src_i_size) + if (key.offset + ext_len > clone_src_i_size) { ext_len = clone_src_i_size - key.offset; + crossed_src_i_size = true; + } clone_data_offset = btrfs_file_extent_offset(leaf, ei); if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { @@ -5821,6 +5824,25 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path, ret = send_clone(sctx, offset, clone_len, clone_root); } + } else if (crossed_src_i_size && clone_len < len) { + /* + * If we are at i_size of the clone source inode and we + * can not clone from it, terminate the loop. This is + * to avoid sending two write operations, one with a + * length matching clone_len and the final one after + * this loop with a length of len - clone_len. + * + * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED + * was passed to the send ioctl), this helps avoid + * sending an encoded write for an offset that is not + * sector size aligned, in case the i_size of the source + * inode is not sector size aligned. That will make the + * receiver fallback to decompression of the data and + * writing it using regular buffered IO, therefore while + * not incorrect, it's not optimal due decompression and + * possible re-compression at the receiver. + */ + break; } else { ret = send_extent_data(sctx, dst_path, offset, clone_len); From 64c150339e7f6c5cbbe8c17a56ef2b3902612798 Mon Sep 17 00:00:00 2001 From: Maxim Korotkov Date: Thu, 17 Nov 2022 15:30:34 +0300 Subject: [PATCH 507/963] pinctrl: single: Fix potential division by zero There is a possibility of dividing by zero due to the pcs->bits_per_pin if pcs->fmask() also has a value of zero and called fls from asm-generic/bitops/builtin-fls.h or arch/x86/include/asm/bitops.h. The function pcs_probe() has the branch that assigned to fmask 0 before pcs_allocate_pin_table() was called Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 4e7e8017a80e ("pinctrl: pinctrl-single: enhance to configure multiple pins of different modules") Signed-off-by: Maxim Korotkov Reviewed-by: Tony Lindgren Link: https://lore.kernel.org/r/20221117123034.27383-1-korotkov.maxim.s@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-single.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 67bec7ea0f8b..414ee6bb8ac9 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) mux_bytes = pcs->width / BITS_PER_BYTE; - if (pcs->bits_per_mux) { + if (pcs->bits_per_mux && pcs->fmask) { pcs->bits_per_pin = fls(pcs->fmask); nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; } else { From f7e942b5bb35d8e3af54053d19a6bf04143a3955 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Wed, 16 Nov 2022 22:23:54 +0800 Subject: [PATCH 508/963] btrfs: qgroup: fix sleep from invalid context bug in btrfs_qgroup_inherit() Syzkaller reported BUG as follows: BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274 Call Trace: dump_stack_lvl+0xcd/0x134 __might_resched.cold+0x222/0x26b kmem_cache_alloc+0x2e7/0x3c0 update_qgroup_limit_item+0xe1/0x390 btrfs_qgroup_inherit+0x147b/0x1ee0 create_subvol+0x4eb/0x1710 btrfs_mksubvol+0xfe5/0x13f0 __btrfs_ioctl_snap_create+0x2b0/0x430 btrfs_ioctl_snap_create_v2+0x25a/0x520 btrfs_ioctl+0x2a1c/0x5ce0 __x64_sys_ioctl+0x193/0x200 do_syscall_64+0x35/0x80 Fix this by calling qgroup_dirty() on @dstqgroup, and update limit item in btrfs_run_qgroups() later outside of the spinlock context. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Signed-off-by: ChenXiaoSong Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 9334c3157c22..b74105a10f16 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2951,14 +2951,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid, dstgroup->rsv_rfer = inherit->lim.rsv_rfer; dstgroup->rsv_excl = inherit->lim.rsv_excl; - ret = update_qgroup_limit_item(trans, dstgroup); - if (ret) { - qgroup_mark_inconsistent(fs_info); - btrfs_info(fs_info, - "unable to update quota limit for %llu", - dstgroup->qgroupid); - goto unlock; - } + qgroup_dirty(fs_info, dstgroup); } if (srcid) { From c7aa1a76d4a0a3c401025b60c401412bbb60f8c6 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Wed, 28 Sep 2022 14:26:50 -0400 Subject: [PATCH 509/963] netfilter: ipset: regression in ip_set_hash_ip.c This patch introduced a regression: commit 48596a8ddc46 ("netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses") The variable e.ip is passed to adtfn() function which finally adds the ip address to the set. The patch above refactored the for loop and moved e.ip = htonl(ip) to the end of the for loop. What this means is that if the value of "ip" changes between the first assignement of e.ip and the forloop, then e.ip is pointing to a different ip address than "ip". Test case: $ ipset create jdtest_tmp hash:ip family inet hashsize 2048 maxelem 100000 $ ipset add jdtest_tmp 10.0.1.1/31 ipset v6.21.1: Element cannot be added to the set: it's already added The value of ip gets updated inside the "else if (tb[IPSET_ATTR_CIDR])" block but e.ip is still pointing to the old value. Fixes: 48596a8ddc46 ("netfilter: ipset: Fix adding an IPv4 range containing more than 2^31 addresses") Reviewed-by: Joshua Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ip.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index dd30c03d5a23..75d556d71652 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -151,18 +151,16 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) return -ERANGE; - if (retried) { + if (retried) ip = ntohl(h->next.ip); - e.ip = htonl(ip); - } for (; ip <= ip_to;) { + e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ip += hosts; - e.ip = htonl(ip); - if (e.ip == 0) + if (ip == 0) return 0; ret = 0; From 839a973988a94c15002cbd81536e4af6ced2bd30 Mon Sep 17 00:00:00 2001 From: Joe Korty Date: Mon, 21 Nov 2022 14:53:43 +0000 Subject: [PATCH 510/963] clocksource/drivers/arm_arch_timer: Fix XGene-1 TVAL register math error The TVAL register is 32 bit signed. Thus only the lower 31 bits are available to specify when an interrupt is to occur at some time in the near future. Attempting to specify a larger interval with TVAL results in a negative time delta which means the timer fires immediately upon being programmed, rather than firing at that expected future time. The solution is for Linux to declare that TVAL is a 31 bit register rather than give its true size of 32 bits. This prevents Linux from programming TVAL with a too-large value. Note that, prior to 5.16, this little trick was the standard way to handle TVAL in Linux, so there is nothing new happening here on that front. The softlockup detector hides the issue, because it keeps generating short timer deadlines that are within the scope of the broken timer. Disabling it, it starts using NO_HZ with much longer timer deadlines, which turns into an interrupt flood: 11: 1124855130 949168462 758009394 76417474 104782230 30210281 310890 1734323687 GICv2 29 Level arch_timer And "much longer" isn't that long: it takes less than 43s to underflow TVAL at 50MHz (the frequency of the counter on XGene-1). Some comments on the v1 version of this patch by Marc Zyngier: XGene implements CVAL (a 64bit comparator) in terms of TVAL (a countdown register) instead of the other way around. TVAL being a 32bit register, the width of the counter should equally be 32. However, TVAL is a *signed* value, and keeps counting down in the negative range once the timer fires. It means that any TVAL value with bit 31 set will fire immediately, as it cannot be distinguished from an already expired timer. Reducing the timer range back to a paltry 31 bits papers over the issue. Another problem cannot be fixed though, which is that the timer interrupt *must* be handled within the negative countdown period, or the interrupt will be lost (TVAL will rollover to a positive value, indicative of a new timer deadline). Fixes: 012f18850452 ("clocksource/drivers/arm_arch_timer: Work around broken CVAL implementations") Signed-off-by: Joe Korty Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20221024165422.GA51107@zipoli.concurrent-rt.com Link: https://lore.kernel.org/r/20221121145343.896018-1-maz@kernel.org [maz: revamped the commit message] --- drivers/clocksource/arm_arch_timer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a7ff77550e17..933bb960490d 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -806,6 +806,9 @@ static u64 __arch_timer_check_delta(void) /* * XGene-1 implements CVAL in terms of TVAL, meaning * that the maximum timer range is 32bit. Shame on them. + * + * Note that TVAL is signed, thus has only 31 of its + * 32 bits to express magnitude. */ MIDR_ALL_VERSIONS(MIDR_CPU_MODEL(ARM_CPU_IMP_APM, APM_CPU_PART_POTENZA)), @@ -813,8 +816,8 @@ static u64 __arch_timer_check_delta(void) }; if (is_midr_in_range_list(read_cpuid_id(), broken_cval_midrs)) { - pr_warn_once("Broken CNTx_CVAL_EL1, limiting width to 32bits"); - return CLOCKSOURCE_MASK(32); + pr_warn_once("Broken CNTx_CVAL_EL1, using 31 bit TVAL instead.\n"); + return CLOCKSOURCE_MASK(31); } #endif return CLOCKSOURCE_MASK(arch_counter_get_width()); From 502487847743018c93d75b401eac2ea4c4973123 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Sat, 19 Nov 2022 12:51:59 +0800 Subject: [PATCH 511/963] cifs: fix missing unlock in cifs_file_copychunk_range() xfstests generic/013 and generic/476 reported WARNING as follows: WARNING: lock held when returning to user space! 6.1.0-rc5+ #4 Not tainted ------------------------------------------------ fsstress/504233 is leaving the kernel with locks still held! 2 locks held by fsstress/504233: #0: ffff888054c38850 (&sb->s_type->i_mutex_key#21){+.+.}-{3:3}, at: lock_two_nondirectories+0xcf/0xf0 #1: ffff8880b8fec750 (&sb->s_type->i_mutex_key#21/4){+.+.}-{3:3}, at: lock_two_nondirectories+0xb7/0xf0 This will lead to deadlock and hungtask. Fix this by releasing locks when failed to write out on a file range in cifs_file_copychunk_range(). Fixes: 3e3761f1ec7d ("smb3: use filemap_write_and_wait_range instead of filemap_write_and_wait") Cc: stable@vger.kernel.org # 6.0 Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: ChenXiaoSong Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index fe220686bba4..712a43161448 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1281,7 +1281,7 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, rc = filemap_write_and_wait_range(src_inode->i_mapping, off, off + len - 1); if (rc) - goto out; + goto unlock; /* should we flush first and last page first */ truncate_inode_pages(&target_inode->i_data, 0); @@ -1297,6 +1297,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, * that target is updated on the server */ CIFS_I(target_inode)->time = 0; + +unlock: /* although unlocking in the reverse order from locking is not * strictly necessary here it is a little cleaner to be consistent */ From 3405a4beaaa852f3ed2a5eb3b5149932d5c3779b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= Date: Thu, 10 Nov 2022 18:40:56 +0100 Subject: [PATCH 512/963] HID: uclogic: Add HID_QUIRK_HIDINPUT_FORCE quirk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit f7d8e387d9ae ("HID: uclogic: Switch to Digitizer usage for styluses") changed the usage used in UCLogic from "Pen" to "Digitizer". However, the IS_INPUT_APPLICATION() macro evaluates to false for HID_DG_DIGITIZER causing issues with the XP-Pen Star G640 tablet. Add the HID_QUIRK_HIDINPUT_FORCE quirk to bypass the IS_INPUT_APPLICATION() check. Reported-by: Torge Matthies Reported-by: Alexander Zhang Tested-by: Alexander Zhang Signed-off-by: José Expósito Signed-off-by: Jiri Kosina --- drivers/hid/hid-uclogic-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 0fbc408c2607..7fa6fe04f1b2 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -192,6 +192,7 @@ static int uclogic_probe(struct hid_device *hdev, * than the pen, so use QUIRK_MULTI_INPUT for all tablets. */ hdev->quirks |= HID_QUIRK_MULTI_INPUT; + hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE; /* Allocate and assign driver data */ drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); From 836e49e103dfeeff670c934b7d563cbd982fce87 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Mon, 14 Nov 2022 08:47:19 -0500 Subject: [PATCH 513/963] bpf: Do not copy spin lock field from user in bpf_selem_alloc bpf_selem_alloc function is used by inode_storage, sk_storage and task_storage maps to set map value, for these map types, there may be a spin lock in the map value, so if we use memcpy to copy the whole map value from user, the spin lock field may be initialized incorrectly. Since the spin lock field is zeroed by kzalloc, call copy_map_value instead of memcpy to skip copying the spin lock field to fix it. Fixes: 6ac99e8f23d4 ("bpf: Introduce bpf sk local storage") Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20221114134720.1057939-2-xukuohai@huawei.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/bpf_local_storage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 802fc15b0d73..f27fa5ba7d72 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -74,7 +74,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, gfp_flags | __GFP_NOWARN); if (selem) { if (value) - memcpy(SDATA(selem)->data, value, smap->map.value_size); + copy_map_value(&smap->map, SDATA(selem)->data, value); return selem; } From d59d3b8a3ed1d9006c602d991d52b351ed851180 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Mon, 14 Nov 2022 08:47:20 -0500 Subject: [PATCH 514/963] bpf: Set and check spin lock value in sk_storage_map_test Update sk_storage_map_test to make sure kernel does not copy user non-zero value spin lock to kernel, and does not copy kernel spin lock value to user. If user spin lock value is copied to kernel, this test case will make kernel spin on the copied lock, resulting in rcu stall and softlockup. Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20221114134720.1057939-3-xukuohai@huawei.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/map_tests/sk_storage_map.c | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index 099eb4dfd4f7..18405c3b7cee 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -458,7 +458,7 @@ static void test_sk_storage_map_basic(void) struct { int cnt; int lock; - } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; + } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value; struct bpf_map_create_opts bad_xattr; int btf_fd, map_fd, sk_fd, err; @@ -483,38 +483,41 @@ static void test_sk_storage_map_basic(void) "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST | BPF_F_LOCK); CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST); CHECK(err, "bpf_map_update_elem(BPF_EXIST)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Update with BPF_NOEXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST | BPF_F_LOCK); CHECK(!err || errno != EEXIST, @@ -526,22 +529,23 @@ static void test_sk_storage_map_basic(void) value.cnt -= 1; err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt again and update with map_flags == 0 */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &sk_fd); From 2b506f20af2b9dcfb3cb2d19e4c804eb20565841 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Nov 2022 11:02:27 +0100 Subject: [PATCH 515/963] selftests/bpf: Filter out default_idle from kprobe_multi bench Alexei hit following rcu warning when running prog_test -j. [ 128.049567] WARNING: suspicious RCU usage [ 128.049569] 6.1.0-rc2 #912 Tainted: G O ... [ 128.050944] kprobe_multi_link_handler+0x6c/0x1d0 [ 128.050947] ? kprobe_multi_link_handler+0x42/0x1d0 [ 128.050950] ? __cpuidle_text_start+0x8/0x8 [ 128.050952] ? __cpuidle_text_start+0x8/0x8 [ 128.050958] fprobe_handler.part.1+0xac/0x150 [ 128.050964] 0xffffffffa02130c8 [ 128.050991] ? default_idle+0x5/0x20 [ 128.050998] default_idle+0x5/0x20 It's caused by bench test attaching kprobe_multi link to default_idle function, which is not executed in rcu safe context so the kprobe handler on top of it will trigger the rcu warning. Filtering out default_idle function from the bench test. Reported-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20221116100228.2064612-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index d457a55ff408..b43928967515 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -358,10 +358,12 @@ static int get_syms(char ***symsp, size_t *cntp) * We attach to almost all kernel functions and some of them * will cause 'suspicious RCU usage' when fprobe is attached * to them. Filter out the current culprits - arch_cpu_idle - * and rcu_* functions. + * default_idle and rcu_* functions. */ if (!strcmp(name, "arch_cpu_idle")) continue; + if (!strcmp(name, "default_idle")) + continue; if (!strncmp(name, "rcu_", 4)) continue; if (!strcmp(name, "bpf_dispatcher_xdp_func")) From 8be602dadb2febb5e4cb367bff1162205bcf9905 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Nov 2022 11:02:28 +0100 Subject: [PATCH 516/963] selftests/bpf: Make test_bench_attach serial Alexei hit another rcu warnings because of this test. Making test_bench_attach serial so it does not disrupts other tests during parallel tests run. While this change is not the fix, it should be less likely to hit it with this test being executed serially. Reported-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20221116100228.2064612-2-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index b43928967515..a4b4133d39e9 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -402,7 +402,7 @@ static int get_syms(char ***symsp, size_t *cntp) return err; } -static void test_bench_attach(void) +void serial_test_kprobe_multi_bench_attach(void) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct kprobe_multi_empty *skel = NULL; @@ -470,6 +470,4 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); - if (test__start_subtest("bench_attach")) - test_bench_attach(); } From 865e0674ca4047969933beea98f57371e611deca Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Tue, 10 May 2022 13:47:07 -0700 Subject: [PATCH 517/963] ANDROID: firmware_loader: Add support for customer firmware paths Currently firmware_class.patch commandline can take a single path for loading firmwares on custom paths. SoC vendors and oems can have firmwares in multiple file system paths. So add support for paassing multiple paths through command line for firmware loader. Add a getter function to read the class path. For example - firmware_class.path="/vendor,/vendor/firmware_mnt, /oem/firmware". firmware_class.path can take upto 10 file system paths with ',' separation. Bug: 259298396 Change-Id: I31d1470d7dd0255c7aefd856f3c129bdb4b7f2e8 Signed-off-by: Prasad Sodagudi Signed-off-by: Vamsi Krishna Lanka --- drivers/base/firmware_loader/main.c | 74 +++++++++++++++++++++++++++-- 1 file changed, 69 insertions(+), 5 deletions(-) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 7c3590fd97c2..6966a1854832 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -467,21 +467,85 @@ static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv, #endif /* CONFIG_FW_LOADER_COMPRESS_XZ */ /* direct firmware loading support */ -static char fw_path_para[256]; +#define CUSTOM_FW_PATH_COUNT 10 +#define PATH_SIZE 255 +static char fw_path_para[CUSTOM_FW_PATH_COUNT][PATH_SIZE]; static const char * const fw_path[] = { - fw_path_para, + fw_path_para[0], + fw_path_para[1], + fw_path_para[2], + fw_path_para[3], + fw_path_para[4], + fw_path_para[5], + fw_path_para[6], + fw_path_para[7], + fw_path_para[8], + fw_path_para[9], "/lib/firmware/updates/" UTS_RELEASE, "/lib/firmware/updates", "/lib/firmware/" UTS_RELEASE, "/lib/firmware" }; +static char strpath[PATH_SIZE * CUSTOM_FW_PATH_COUNT]; +static int firmware_param_path_set(const char *val, const struct kernel_param *kp) +{ + int i; + char *path, *end; + + strscpy(strpath, val, sizeof(strpath)); + /* Remove leading and trailing spaces from path */ + path = strim(strpath); + for (i = 0; path && i < CUSTOM_FW_PATH_COUNT; i++) { + end = strchr(path, ','); + + /* Skip continuous token case, for example ',,,' */ + if (end == path) { + i--; + path = ++end; + continue; + } + + if (end != NULL) + *end = '\0'; + else { + /* end of the string reached and no other tockens ',' */ + strscpy(fw_path_para[i], path, PATH_SIZE); + break; + } + + strscpy(fw_path_para[i], path, PATH_SIZE); + path = ++end; + } + + return 0; +} + +static int firmware_param_path_get(char *buffer, const struct kernel_param *kp) +{ + int count = 0, i; + + for (i = 0; i < CUSTOM_FW_PATH_COUNT; i++) { + if (strlen(fw_path_para[i]) != 0) + count += scnprintf(buffer + count, PATH_SIZE, "%s%s", fw_path_para[i], ","); + } + + buffer[count - 1] = '\0'; + + return count - 1; +} /* - * Typical usage is that passing 'firmware_class.path=$CUSTOMIZED_PATH' + * Typical usage is that passing 'firmware_class.path=/vendor,/firwmare_mnt' * from kernel command line because firmware_class is generally built in - * kernel instead of module. + * kernel instead of module. ',' is used as delimiter for setting 10 + * custom paths for firmware loader. */ -module_param_string(path, fw_path_para, sizeof(fw_path_para), 0644); + +static const struct kernel_param_ops firmware_param_ops = { + .set = firmware_param_path_set, + .get = firmware_param_path_get, +}; +module_param_cb(path, &firmware_param_ops, NULL, 0644); MODULE_PARM_DESC(path, "customized firmware image search path with a higher priority than default path"); static int From a3cab1d2132474969871b5d7f915c5c0167b48b0 Mon Sep 17 00:00:00 2001 From: Sebastian Falbesoner Date: Mon, 21 Nov 2022 11:57:21 +0100 Subject: [PATCH 518/963] mmc: sdhci-esdhc-imx: correct CQHCI exit halt state check With the current logic the "failed to exit halt state" error would be shown even if any other bit than CQHCI_HALT was set in the CQHCI_CTL register, since the right hand side is always true. Fix this by using the correct operator (bit-wise instead of logical AND) to only check for the halt bit flag, which was obviously intended here. Fixes: 85236d2be844 ("mmc: sdhci-esdhc-imx: clear the HALT bit when enable CQE") Signed-off-by: Sebastian Falbesoner Acked-by: Haibo Chen Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221121105721.1903878-1-sebastian.falbesoner@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 31ea0a2fce35..ffeb5759830f 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1512,7 +1512,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc) * system resume back. */ cqhci_writel(cq_host, 0, CQHCI_CTL); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) dev_err(mmc_dev(host->mmc), "failed to exit halt state when enable CQE\n"); From ec61b41918587be530398b0d1c9a0d16619397e5 Mon Sep 17 00:00:00 2001 From: ZhangPeng Date: Wed, 16 Nov 2022 07:14:28 +0000 Subject: [PATCH 519/963] HID: core: fix shift-out-of-bounds in hid_report_raw_event Syzbot reported shift-out-of-bounds in hid_report_raw_event. microsoft 0003:045E:07DA.0001: hid_field_extract() called with n (128) > 32! (swapper/0) ====================================================================== UBSAN: shift-out-of-bounds in drivers/hid/hid-core.c:1323:20 shift exponent 127 is too large for 32-bit type 'int' CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.1.0-rc4-syzkaller-00159-g4bbf3422df78 #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106 ubsan_epilogue lib/ubsan.c:151 [inline] __ubsan_handle_shift_out_of_bounds+0x3a6/0x420 lib/ubsan.c:322 snto32 drivers/hid/hid-core.c:1323 [inline] hid_input_fetch_field drivers/hid/hid-core.c:1572 [inline] hid_process_report drivers/hid/hid-core.c:1665 [inline] hid_report_raw_event+0xd56/0x18b0 drivers/hid/hid-core.c:1998 hid_input_report+0x408/0x4f0 drivers/hid/hid-core.c:2066 hid_irq_in+0x459/0x690 drivers/hid/usbhid/hid-core.c:284 __usb_hcd_giveback_urb+0x369/0x530 drivers/usb/core/hcd.c:1671 dummy_timer+0x86b/0x3110 drivers/usb/gadget/udc/dummy_hcd.c:1988 call_timer_fn+0xf5/0x210 kernel/time/timer.c:1474 expire_timers kernel/time/timer.c:1519 [inline] __run_timers+0x76a/0x980 kernel/time/timer.c:1790 run_timer_softirq+0x63/0xf0 kernel/time/timer.c:1803 __do_softirq+0x277/0x75b kernel/softirq.c:571 __irq_exit_rcu+0xec/0x170 kernel/softirq.c:650 irq_exit_rcu+0x5/0x20 kernel/softirq.c:662 sysvec_apic_timer_interrupt+0x91/0xb0 arch/x86/kernel/apic/apic.c:1107 ====================================================================== If the size of the integer (unsigned n) is bigger than 32 in snto32(), shift exponent will be too large for 32-bit type 'int', resulting in a shift-out-of-bounds bug. Fix this by adding a check on the size of the integer (unsigned n) in snto32(). To add support for n greater than 32 bits, set n to 32, if n is greater than 32. Reported-by: syzbot+8b1641d2f14732407e23@syzkaller.appspotmail.com Fixes: dde5845a529f ("[PATCH] Generic HID layer - code split") Signed-off-by: ZhangPeng Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 9c1d31f63f85..bd47628da6be 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1315,6 +1315,9 @@ static s32 snto32(__u32 value, unsigned n) if (!value || !n) return 0; + if (n > 32) + n = 32; + switch (n) { case 8: return ((__s8)value); case 16: return ((__s16)value); From 6bf834cb5329a892e25b7b1bfe4287b7c7f8b7cd Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Fri, 11 Nov 2022 16:37:02 -0800 Subject: [PATCH 520/963] ANDROID: kleaf: Add build rules for allmodconfig. These corresponds to the build.config.allmodconfig.* build configs. Unlike build.sh, the Kleaf variant deliberately does not copy any outputs to the distribution directory. There's not even a dist target. This is because allmodconfig is a build test only. Test: bazel build --allow_undeclared_mdoules //common:allmodconfig_{aarch64,arm,x86_64} Bug: 258259749 Change-Id: Iefe8f69149f6391938696e90243939d42d4fda84 Signed-off-by: Yifan Hong --- BUILD.bazel | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/BUILD.bazel b/BUILD.bazel index 8967b61d5d91..68b160637797 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -324,6 +324,42 @@ copy_to_dist_dir( flat = True, ) +# allmodconfig build tests. +# These are build tests only, so: +# - outs are intentionally set to empty to not copy anything to DIST_DIR +# - --allow-undeclared-modules must be used so modules are not declared or copied. +# - No dist target because these are build tests. We don't care about the artifacts. + +# tools/bazel build --allow_undeclared_modules //common:allmodconfig_aarch64 +kernel_build( + name = "allmodconfig_aarch64", + # Hack to actually check the build. + # Otherwise, Bazel thinks that there are no output files, and skip building. + outs = [".config"], + build_config = "build.config.allmodconfig.aarch64", + visibility = ["//visibility:private"], +) + +# tools/bazel build --allow_undeclared_modules //common:allmodconfig_x86_64 +kernel_build( + name = "allmodconfig_x86_64", + # Hack to actually check the build. + # Otherwise, Bazel thinks that there are no output files, and skip building. + outs = [".config"], + build_config = "build.config.allmodconfig.x86_64", + visibility = ["//visibility:private"], +) + +# tools/bazel build --allow_undeclared_modules //common:allmodconfig_arm +kernel_build( + name = "allmodconfig_arm", + # Hack to actually check the build. + # Otherwise, Bazel thinks that there are no output files, and skip building. + outs = [".config"], + build_config = "build.config.allmodconfig.arm", + visibility = ["//visibility:private"], +) + # DDK Headers # All headers. These are the public targets for DDK modules to use. alias( From 3ca682389435681e2d660428be2187fbae0effbb Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 14 Nov 2022 15:50:27 -0500 Subject: [PATCH 521/963] drm/amd/display: Align dcn314_smu logging with other DCNs [Why] Assert on non-OK response from SMU is unnecessary. It was replaced with respective log message on other asics in the past with commit: "drm/amd/display: Removing assert statements for Linux" [How] Remove assert and add dbg logging as on other DCNs. Signed-off-by: Roman Li Reviewed-by: Saaem Rizvi Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c index ef0795b14a1f..2db595672a46 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c @@ -123,9 +123,10 @@ static int dcn314_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, uint32_t result; result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000); - ASSERT(result == VBIOSSMC_Result_OK); - smu_print("SMU response after wait: %d\n", result); + if (result != VBIOSSMC_Result_OK) + smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", + result); if (result == VBIOSSMC_Status_BUSY) return -1; @@ -216,6 +217,12 @@ int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request VBIOSSMC_MSG_SetHardMinDcfclkByFreq, khz_to_mhz_ceil(requested_dcfclk_khz)); +#ifdef DBG + smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", + actual_dcfclk_set_mhz, + actual_dcfclk_set_mhz * 1000); +#endif + return actual_dcfclk_set_mhz * 1000; } From ba891436c2d2b2a6d6c1bc3733bab3b72f07e87f Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 14 Nov 2022 17:17:52 -0500 Subject: [PATCH 522/963] drm/amdgpu/mst: Stop ignoring error codes and deadlocking It appears that amdgpu makes the mistake of completely ignoring the return values from the DP MST helpers, and instead just returns a simple true/false. In this case, it seems to have come back to bite us because as a result of simply returning false from compute_mst_dsc_configs_for_state(), amdgpu had no way of telling when a deadlock happened from these helpers. This could definitely result in some kernel splats. V2: * Address Wayne's comments (fix another bunch of spots where we weren't passing down return codes) Signed-off-by: Lyude Paul Fixes: 8c20a1ed9b4f ("drm/amd/display: MST DSC compute fair share") Cc: Harry Wentland Cc: # v5.6+ Reviewed-by: Wayne Lin Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 18 +- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 235 ++++++++++-------- .../display/amdgpu_dm/amdgpu_dm_mst_types.h | 12 +- 3 files changed, 147 insertions(+), 118 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3e1ecca72430..636e6d640b4e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6475,7 +6475,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, struct drm_connector_state *new_con_state; struct amdgpu_dm_connector *aconnector; struct dm_connector_state *dm_conn_state; - int i, j; + int i, j, ret; int vcpi, pbn_div, pbn, slot_num = 0; for_each_new_connector_in_state(state, connector, new_con_state, i) { @@ -6522,8 +6522,11 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, dm_conn_state->pbn = pbn; dm_conn_state->vcpi_slots = slot_num; - drm_dp_mst_atomic_enable_dsc(state, aconnector->port, dm_conn_state->pbn, - false); + ret = drm_dp_mst_atomic_enable_dsc(state, aconnector->port, + dm_conn_state->pbn, false); + if (ret < 0) + return ret; + continue; } @@ -9537,10 +9540,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, #if defined(CONFIG_DRM_AMD_DC_DCN) if (dc_resource_is_dsc_encoding_supported(dc)) { - if (!pre_validate_dsc(state, &dm_state, vars)) { - ret = -EINVAL; + ret = pre_validate_dsc(state, &dm_state, vars); + if (ret != 0) goto fail; - } } #endif @@ -9635,9 +9637,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, } #if defined(CONFIG_DRM_AMD_DC_DCN) - if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) { + ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars); + if (ret) { DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; goto fail; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 6ff96b4bdda5..bba2e8aaa2c2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -703,13 +703,13 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn) return dsc_config.bits_per_pixel; } -static bool increase_dsc_bpp(struct drm_atomic_state *state, - struct drm_dp_mst_topology_state *mst_state, - struct dc_link *dc_link, - struct dsc_mst_fairness_params *params, - struct dsc_mst_fairness_vars *vars, - int count, - int k) +static int increase_dsc_bpp(struct drm_atomic_state *state, + struct drm_dp_mst_topology_state *mst_state, + struct dc_link *dc_link, + struct dsc_mst_fairness_params *params, + struct dsc_mst_fairness_vars *vars, + int count, + int k) { int i; bool bpp_increased[MAX_PIPES]; @@ -719,6 +719,7 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state, int remaining_to_increase = 0; int link_timeslots_used; int fair_pbn_alloc; + int ret = 0; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { @@ -757,52 +758,60 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state, if (initial_slack[next_index] > fair_pbn_alloc) { vars[next_index].pbn += fair_pbn_alloc; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; + + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn); } else { vars[next_index].pbn -= fair_pbn_alloc; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } } else { vars[next_index].pbn += initial_slack[next_index]; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; + + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16; } else { vars[next_index].pbn -= initial_slack[next_index]; - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } } bpp_increased[next_index] = true; remaining_to_increase--; } - return true; + return 0; } -static bool try_disable_dsc(struct drm_atomic_state *state, - struct dc_link *dc_link, - struct dsc_mst_fairness_params *params, - struct dsc_mst_fairness_vars *vars, - int count, - int k) +static int try_disable_dsc(struct drm_atomic_state *state, + struct dc_link *dc_link, + struct dsc_mst_fairness_params *params, + struct dsc_mst_fairness_vars *vars, + int count, + int k) { int i; bool tried[MAX_PIPES]; @@ -810,6 +819,7 @@ static bool try_disable_dsc(struct drm_atomic_state *state, int max_kbps_increase; int next_index; int remaining_to_try = 0; + int ret; for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -840,49 +850,52 @@ static bool try_disable_dsc(struct drm_atomic_state *state, break; vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; - if (!drm_dp_mst_atomic_check(state)) { + ret = drm_dp_mst_atomic_check(state); + if (ret == 0) { vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps); - if (drm_dp_atomic_find_time_slots(state, - params[next_index].port->mgr, - params[next_index].port, - vars[next_index].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, + params[next_index].port->mgr, + params[next_index].port, + vars[next_index].pbn); + if (ret < 0) + return ret; } tried[next_index] = true; remaining_to_try--; } - return true; + return 0; } -static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dc_link *dc_link, - struct dsc_mst_fairness_vars *vars, - struct drm_dp_mst_topology_mgr *mgr, - int *link_vars_start_index) +static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dc_link *dc_link, + struct dsc_mst_fairness_vars *vars, + struct drm_dp_mst_topology_mgr *mgr, + int *link_vars_start_index) { struct dc_stream_state *stream; struct dsc_mst_fairness_params params[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_state *mst_state = drm_atomic_get_mst_topology_state(state, mgr); int count = 0; - int i, k; + int i, k, ret; bool debugfs_overwrite = false; memset(params, 0, sizeof(params)); if (IS_ERR(mst_state)) - return false; + return PTR_ERR(mst_state); mst_state->pbn_div = dm_mst_get_pbn_divider(dc_link); #if defined(CONFIG_DRM_AMD_DC_DCN) @@ -933,7 +946,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (count == 0) { ASSERT(0); - return true; + return 0; } /* k is start index of vars for current phy link used by mst hub */ @@ -947,13 +960,17 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, - vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, + vars[i + k].pbn); + if (ret < 0) + return ret; } - if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) { + ret = drm_dp_mst_atomic_check(state); + if (ret == 0 && !debugfs_overwrite) { set_dsc_configs_from_fairness_vars(params, vars, count, k); - return true; + return 0; + } else if (ret != -ENOSPC) { + return ret; } /* Try max compression */ @@ -962,31 +979,36 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, + params[i].port, vars[i + k].pbn); + if (ret < 0) + return ret; } else { vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; - if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, - params[i].port, vars[i + k].pbn) < 0) - return false; + ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, + params[i].port, vars[i + k].pbn); + if (ret < 0) + return ret; } } - if (drm_dp_mst_atomic_check(state)) - return false; + ret = drm_dp_mst_atomic_check(state); + if (ret != 0) + return ret; /* Optimize degree of compression */ - if (!increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k)) - return false; + ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k); + if (ret < 0) + return ret; - if (!try_disable_dsc(state, dc_link, params, vars, count, k)) - return false; + ret = try_disable_dsc(state, dc_link, params, vars, count, k); + if (ret < 0) + return ret; set_dsc_configs_from_fairness_vars(params, vars, count, k); - return true; + return 0; } static bool is_dsc_need_re_compute( @@ -1087,15 +1109,16 @@ static bool is_dsc_need_re_compute( return is_dsc_need_re_compute; } -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars) +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; int link_vars_start_index = 0; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; @@ -1118,17 +1141,19 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, continue; if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK) - return false; + return -EINVAL; if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, - &link_vars_start_index)) { + + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, + &aconnector->mst_mgr, + &link_vars_start_index); + if (ret != 0) { mutex_unlock(&aconnector->mst_mgr.lock); - return false; + return ret; } mutex_unlock(&aconnector->mst_mgr.lock); @@ -1143,22 +1168,22 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (stream->timing.flags.DSC == 1) if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK) - return false; + return -EINVAL; } - return true; + return ret; } -static bool - pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars) +static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars) { int i, j; struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; int link_vars_start_index = 0; + int ret; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; @@ -1184,11 +1209,12 @@ static bool continue; mutex_lock(&aconnector->mst_mgr.lock); - if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, - &link_vars_start_index)) { + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, + &aconnector->mst_mgr, + &link_vars_start_index); + if (ret != 0) { mutex_unlock(&aconnector->mst_mgr.lock); - return false; + return ret; } mutex_unlock(&aconnector->mst_mgr.lock); @@ -1198,7 +1224,7 @@ static bool } } - return true; + return ret; } static int find_crtc_index_in_state_by_stream(struct drm_atomic_state *state, @@ -1253,9 +1279,9 @@ static bool is_dsc_precompute_needed(struct drm_atomic_state *state) return ret; } -bool pre_validate_dsc(struct drm_atomic_state *state, - struct dm_atomic_state **dm_state_ptr, - struct dsc_mst_fairness_vars *vars) +int pre_validate_dsc(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state_ptr, + struct dsc_mst_fairness_vars *vars) { int i; struct dm_atomic_state *dm_state; @@ -1264,11 +1290,12 @@ bool pre_validate_dsc(struct drm_atomic_state *state, if (!is_dsc_precompute_needed(state)) { DRM_INFO_ONCE("DSC precompute is not needed.\n"); - return true; + return 0; } - if (dm_atomic_get_state(state, dm_state_ptr)) { + ret = dm_atomic_get_state(state, dm_state_ptr); + if (ret != 0) { DRM_INFO_ONCE("dm_atomic_get_state() failed\n"); - return false; + return ret; } dm_state = *dm_state_ptr; @@ -1280,7 +1307,7 @@ bool pre_validate_dsc(struct drm_atomic_state *state, local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL); if (!local_dc_state) - return false; + return -ENOMEM; for (i = 0; i < local_dc_state->stream_count; i++) { struct dc_stream_state *stream = dm_state->context->streams[i]; @@ -1316,9 +1343,9 @@ bool pre_validate_dsc(struct drm_atomic_state *state, if (ret != 0) goto clean_exit; - if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) { + ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars); + if (ret != 0) { DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n"); - ret = -EINVAL; goto clean_exit; } @@ -1349,7 +1376,7 @@ bool pre_validate_dsc(struct drm_atomic_state *state, kfree(local_dc_state); - return (ret == 0); + return ret; } static unsigned int kbps_from_pbn(unsigned int pbn) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index b92a7c5671aa..97fd70df531b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -53,15 +53,15 @@ struct dsc_mst_fairness_vars { struct amdgpu_dm_connector *aconnector; }; -bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, - struct dc_state *dc_state, - struct dsc_mst_fairness_vars *vars); +int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, + struct dc_state *dc_state, + struct dsc_mst_fairness_vars *vars); bool needs_dsc_aux_workaround(struct dc_link *link); -bool pre_validate_dsc(struct drm_atomic_state *state, - struct dm_atomic_state **dm_state_ptr, - struct dsc_mst_fairness_vars *vars); +int pre_validate_dsc(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state_ptr, + struct dsc_mst_fairness_vars *vars); enum dc_status dm_dp_mst_is_port_support_mode( struct amdgpu_dm_connector *aconnector, From 2f3a1273862cb82cca227630cc7f04ce0c94b6bb Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 14 Nov 2022 17:17:53 -0500 Subject: [PATCH 523/963] drm/display/dp_mst: Fix drm_dp_mst_add_affected_dsc_crtcs() return code Looks like that we're accidentally dropping a pretty important return code here. For some reason, we just return -EINVAL if we fail to get the MST topology state. This is wrong: error codes are important and should never be squashed without being handled, which here seems to have the potential to cause a deadlock. Signed-off-by: Lyude Paul Reviewed-by: Wayne Lin Fixes: 8ec046716ca8 ("drm/dp_mst: Add helper to trigger modeset on affected DSC MST CRTCs") Cc: # v5.6+ Signed-off-by: Alex Deucher --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index ecd22c038c8c..51a46689cda7 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -5186,7 +5186,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm mst_state = drm_atomic_get_mst_topology_state(state, mgr); if (IS_ERR(mst_state)) - return -EINVAL; + return PTR_ERR(mst_state); list_for_each_entry(pos, &mst_state->payloads, next) { From dfbc00410c48a9896d4a65600be7137202517780 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 14 Nov 2022 17:17:54 -0500 Subject: [PATCH 524/963] drm/amdgpu/dm/mst: Use the correct topology mgr pointer in amdgpu_dm_connector This bug hurt me. Basically, it appears that we've been grabbing the entirely wrong mutex in the MST DSC computation code for amdgpu! While we've been grabbing: amdgpu_dm_connector->mst_mgr That's zero-initialized memory, because the only connectors we'll ever actually be doing DSC computations for are MST ports. Which have mst_mgr zero-initialized, and instead have the correct topology mgr pointer located at: amdgpu_dm_connector->mst_port->mgr; I'm a bit impressed that until now, this code has managed not to crash anyone's systems! It does seem to cause a warning in LOCKDEP though: [ 66.637670] DEBUG_LOCKS_WARN_ON(lock->magic != lock) This was causing the problems that appeared to have been introduced by: commit 4d07b0bc4034 ("drm/display/dp_mst: Move all payload info into the atomic state") This wasn't actually where they came from though. Presumably, before the only thing we were doing with the topology mgr pointer was attempting to grab mst_mgr->lock. Since the above commit however, we grab much more information from mst_mgr including the atomic MST state and respective modesetting locks. This patch also implies that up until now, it's quite likely we could be susceptible to race conditions when going through the MST topology state for DSC computations since we technically will not have grabbed any lock when going through it. So, let's fix this by adjusting all the respective code paths to look at the right pointer and skip things that aren't actual MST connectors from a topology. Gitlab issue: https://gitlab.freedesktop.org/drm/amd/-/issues/2171 Signed-off-by: Lyude Paul Fixes: 8c20a1ed9b4f ("drm/amd/display: MST DSC compute fair share") Cc: # v5.6+ Reviewed-by: Wayne Lin Signed-off-by: Alex Deucher --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index bba2e8aaa2c2..5196c9a0e432 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1117,6 +1117,7 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; int ret = 0; @@ -1131,7 +1132,7 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (!aconnector || !aconnector->dc_sink) + if (!aconnector || !aconnector->dc_sink || !aconnector->port) continue; if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported) @@ -1146,16 +1147,13 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; - mutex_lock(&aconnector->mst_mgr.lock); - - ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, + mst_mgr = aconnector->port->mgr; + mutex_lock(&mst_mgr->lock); + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, &link_vars_start_index); - if (ret != 0) { - mutex_unlock(&aconnector->mst_mgr.lock); + mutex_unlock(&mst_mgr->lock); + if (ret != 0) return ret; - } - mutex_unlock(&aconnector->mst_mgr.lock); for (j = 0; j < dc_state->stream_count; j++) { if (dc_state->streams[j]->link == stream->link) @@ -1182,6 +1180,7 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, struct dc_stream_state *stream; bool computed_streams[MAX_PIPES]; struct amdgpu_dm_connector *aconnector; + struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; int ret; @@ -1196,7 +1195,7 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; - if (!aconnector || !aconnector->dc_sink) + if (!aconnector || !aconnector->dc_sink || !aconnector->port) continue; if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported) @@ -1208,15 +1207,13 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, if (!is_dsc_need_re_compute(state, dc_state, stream->link)) continue; - mutex_lock(&aconnector->mst_mgr.lock); - ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, - &aconnector->mst_mgr, + mst_mgr = aconnector->port->mgr; + mutex_lock(&mst_mgr->lock); + ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, &link_vars_start_index); - if (ret != 0) { - mutex_unlock(&aconnector->mst_mgr.lock); + mutex_unlock(&mst_mgr->lock); + if (ret != 0) return ret; - } - mutex_unlock(&aconnector->mst_mgr.lock); for (j = 0; j < dc_state->stream_count; j++) { if (dc_state->streams[j]->link == stream->link) @@ -1419,6 +1416,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0; unsigned int max_compressed_bw_in_kbps = 0; struct dc_dsc_bw_range bw_range = {0}; + struct drm_dp_mst_topology_mgr *mst_mgr; /* * check if the mode could be supported if DSC pass-through is supported @@ -1427,7 +1425,8 @@ enum dc_status dm_dp_mst_is_port_support_mode( */ if (is_dsc_common_config_possible(stream, &bw_range) && aconnector->port->passthrough_aux) { - mutex_lock(&aconnector->mst_mgr.lock); + mst_mgr = aconnector->port->mgr; + mutex_lock(&mst_mgr->lock); cur_link_settings = stream->link->verified_link_cap; @@ -1440,7 +1439,7 @@ enum dc_status dm_dp_mst_is_port_support_mode( end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps, down_link_bw_in_kbps); - mutex_unlock(&aconnector->mst_mgr.lock); + mutex_unlock(&mst_mgr->lock); /* * use the maximum dsc compression bandwidth as the required From d60b82aa4d67b2e6cf0364947a008bb7255ca4da Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 14 Nov 2022 17:17:55 -0500 Subject: [PATCH 525/963] drm/amdgpu/dm/dp_mst: Don't grab mst_mgr->lock when computing DSC state Now that we've fixed the issue with using the incorrect topology manager, we're actually grabbing the topology manager's lock - and consequently deadlocking. Luckily for us though, there's actually nothing in AMD's DSC state computation code that really should need this lock. The one exception is the mutex_lock() in dm_dp_mst_is_port_support_mode(), however we grab no locks beneath &mgr->lock there so that should be fine to leave be. Gitlab issue: https://gitlab.freedesktop.org/drm/amd/-/issues/2171 Signed-off-by: Lyude Paul Fixes: 8c20a1ed9b4f ("drm/amd/display: MST DSC compute fair share") Cc: # v5.6+ Reviewed-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5196c9a0e432..59648f5ffb59 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1148,10 +1148,8 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, continue; mst_mgr = aconnector->port->mgr; - mutex_lock(&mst_mgr->lock); ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, &link_vars_start_index); - mutex_unlock(&mst_mgr->lock); if (ret != 0) return ret; @@ -1208,10 +1206,8 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, continue; mst_mgr = aconnector->port->mgr; - mutex_lock(&mst_mgr->lock); ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr, &link_vars_start_index); - mutex_unlock(&mst_mgr->lock); if (ret != 0) return ret; From 85ef1679a190a9740f6b72217cb139a0d9c58706 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Fri, 18 Nov 2022 14:54:05 -0500 Subject: [PATCH 526/963] drm/amdgpu/dm/mst: Fix uninitialized var in pre_compute_mst_dsc_configs_for_state() Coverity noticed this one, so let's fix it. Fixes: ba891436c2d2b2 ("drm/amdgpu/mst: Stop ignoring error codes and deadlocking") Signed-off-by: Lyude Paul Signed-off-by: Alex Deucher Reviewed-by: Harry Wentland Cc: stable@vger.kernel.org # v5.6+ --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 59648f5ffb59..6483ba266893 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -1180,7 +1180,7 @@ static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state, struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; int link_vars_start_index = 0; - int ret; + int ret = 0; for (i = 0; i < dc_state->stream_count; i++) computed_streams[i] = false; From b39df63b16b64a3af42695acb9bc567aad144776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 9 Nov 2022 12:14:44 +0100 Subject: [PATCH 527/963] drm/amdgpu: always register an MMU notifier for userptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since switching to HMM we always need that because we no longer grab references to the pages. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Felix Kuehling CC: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 8ef31d687ef3..111484ceb47d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -413,11 +413,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; - if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) { - r = amdgpu_mn_register(bo, args->addr); - if (r) - goto release_object; - } + r = amdgpu_mn_register(bo, args->addr); + if (r) + goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); From 4458da0bb09d4435956b4377685e8836935e9b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 10 Nov 2022 12:31:41 +0100 Subject: [PATCH 528/963] drm/amdgpu: fix userptr HMM range handling v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The basic problem here is that it's not allowed to page fault while holding the reservation lock. So it can happen that multiple processes try to validate an userptr at the same time. Work around that by putting the HMM range object into the mutex protected bo list for now. v2: make sure range is set to NULL in case of an error Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Felix Kuehling CC: stable@vger.kernel.org Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 53 ++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 14 +++-- 7 files changed, 46 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 6d1ff7b9258d..1f76e27f1a35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -986,6 +986,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; struct ttm_operation_ctx ctx = { true, false }; + struct hmm_range *range; int ret = 0; mutex_lock(&process_info->lock); @@ -1015,7 +1016,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, return 0; } - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); goto unregister_out; @@ -1033,7 +1034,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, amdgpu_bo_unreserve(bo); release_out: - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -2370,6 +2371,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, /* Go through userptr_inval_list and update any invalid user_pages */ list_for_each_entry(mem, &process_info->userptr_inval_list, validate_list.head) { + struct hmm_range *range; + invalid = atomic_read(&mem->invalid); if (!invalid) /* BO hasn't been invalidated since the last @@ -2380,7 +2383,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; /* Get updated user pages */ - ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, + &range); if (ret) { pr_debug("Failed %d to get user pages\n", ret); @@ -2399,7 +2403,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * FIXME: Cannot ignore the return code, must hold * notifier_lock */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); } /* Mark the BO as valid unless it was invalidated diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 2168163aad2d..252a876b0725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -209,6 +209,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, list_add_tail(&e->tv.head, &bucket[priority]); e->user_pages = NULL; + e->range = NULL; } /* Connect the sorted buckets in the output list. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 9caea1688fc3..e4d78491bcc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -26,6 +26,8 @@ #include #include +struct hmm_range; + struct amdgpu_device; struct amdgpu_bo; struct amdgpu_bo_va; @@ -36,6 +38,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; + struct hmm_range *range; bool user_invalidated; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d038b258cc92..365e3fb6a9e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -913,7 +913,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto out_free_user_pages; } - r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages); + r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range); if (r) { kvfree(e->user_pages); e->user_pages = NULL; @@ -991,9 +991,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (!e->user_pages) continue; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); kvfree(e->user_pages); e->user_pages = NULL; + e->range = NULL; } mutex_unlock(&p->bo_list->bo_list_mutex); return r; @@ -1273,7 +1274,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); + e->range = NULL; } if (r) { r = -EAGAIN; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 111484ceb47d..91571b1324f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -378,6 +378,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, struct amdgpu_device *adev = drm_to_adev(dev); struct drm_amdgpu_gem_userptr *args = data; struct drm_gem_object *gobj; + struct hmm_range *range; struct amdgpu_bo *bo; uint32_t handle; int r; @@ -418,7 +419,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { - r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); + r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, + &range); if (r) goto release_object; @@ -441,7 +443,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, user_pages_done: if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); release_object: drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 57277b1cf183..b64938ed8cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -643,9 +643,6 @@ struct amdgpu_ttm_tt { struct task_struct *usertask; uint32_t userflags; bool bound; -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - struct hmm_range *range; -#endif }; #define ttm_to_amdgpu_ttm_tt(ptr) container_of(ptr, struct amdgpu_ttm_tt, ttm) @@ -658,7 +655,8 @@ struct amdgpu_ttm_tt { * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range) { struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); @@ -668,16 +666,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) bool readonly; int r = 0; + /* Make sure get_user_pages_done() can cleanup gracefully */ + *range = NULL; + mm = bo->notifier.mm; if (unlikely(!mm)) { DRM_DEBUG_DRIVER("BO is not registered?\n"); return -EFAULT; } - /* Another get_user_pages is running at the same time?? */ - if (WARN_ON(gtt->range)) - return -EFAULT; - if (!mmget_not_zero(mm)) /* Happens during process shutdown */ return -ESRCH; @@ -695,7 +692,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) readonly = amdgpu_ttm_tt_is_readonly(ttm); r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, - ttm->num_pages, >t->range, readonly, + ttm->num_pages, range, readonly, true, NULL); out_unlock: mmap_read_unlock(mm); @@ -713,30 +710,24 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) * * Returns: true if pages are still valid */ -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range) { struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm); - bool r = false; - if (!gtt || !gtt->userptr) + if (!gtt || !gtt->userptr || !range) return false; DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n", gtt->userptr, ttm->num_pages); - WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns, - "No user pages to check\n"); + WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); - if (gtt->range) { - /* - * FIXME: Must always hold notifier_lock for this, and must - * not ignore the return code. - */ - r = amdgpu_hmm_range_get_pages_done(gtt->range); - gtt->range = NULL; - } - - return !r; + /* + * FIXME: Must always hold notifier_lock for this, and must + * not ignore the return code. + */ + return !amdgpu_hmm_range_get_pages_done(range); } #endif @@ -813,20 +804,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, /* unmap the pages mapped to the device */ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); - -#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) - if (gtt->range) { - unsigned long i; - - for (i = 0; i < ttm->num_pages; i++) { - if (ttm->pages[i] != - hmm_pfn_to_page(gtt->range->hmm_pfns[i])) - break; - } - - WARN((i == ttm->num_pages), "Missing get_user_page_done\n"); - } -#endif } static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6a70818039dd..a37207011a69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -39,6 +39,8 @@ #define AMDGPU_POISON 0xd0bed0be +struct hmm_range; + struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; @@ -149,15 +151,19 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) -int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages); -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages, + struct hmm_range **range); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range); #else static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, - struct page **pages) + struct page **pages, + struct hmm_range **range) { return -EPERM; } -static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm, + struct hmm_range *range) { return false; } From b9ab82da8804ec22c7e91ffd9d56c7a3abff0c8e Mon Sep 17 00:00:00 2001 From: Ramesh Errabolu Date: Wed, 16 Nov 2022 10:46:08 -0600 Subject: [PATCH 529/963] drm/amdgpu: Enable Aldebaran devices to report CU Occupancy Allow user to know number of compute units (CU) that are in use at any given moment. Enable access to the method kgd_gfx_v9_get_cu_occupancy that computes CU occupancy. Signed-off-by: Ramesh Errabolu Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index c8935d718207..4485bb29bec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -41,5 +41,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings }; From 91abf28a636291135ea5cab9af40f017cff6afce Mon Sep 17 00:00:00 2001 From: Jack Xiao Date: Wed, 16 Nov 2022 16:44:21 +0800 Subject: [PATCH 530/963] drm/amd/amdgpu: reserve vm invalidation engine for firmware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If mes enabled, reserve VM invalidation engine 5 for firmware. Signed-off-by: Jack Xiao Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b06cb0bb166c..28612e56d0d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -479,6 +479,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) unsigned i; unsigned vmhub, inv_eng; + if (adev->enable_mes) { + /* reserve engine 5 for firmware */ + for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++) + vm_inv_engs[vmhub] &= ~(1 << 5); + } + for (i = 0; i < adev->num_rings; ++i) { ring = adev->rings[i]; vmhub = ring->funcs->vmhub; From 472faf72b33d80aa8e7a99c9410c1a23d3bf0cd8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 19 Nov 2022 17:37:49 -0800 Subject: [PATCH 531/963] device-dax: Fix duplicate 'hmem' device registration So called "soft-reserved" memory is an EFI conventional memory range with the EFI_MEMORY_SP attribute set. That attribute indicates that the memory is not part of the platform general purpose memory pool and may want some consideration from the system administrator about whether to keep that memory set aside for dedicated access through device-dax (map a device file), or assigned to the page allocator as another general purpose memory node target. Absent an ACPI HMAT table the default device-dax registration creates coarse grained devices that are delineated by EFI Memory Map entries. With the HMAT the devices are delineated by the finer grained ranges associated with the proximity domain of the memory target. I.e. the HMAT describes the properties of performance differentiated memory and each unique performance description results in a unique target proximity domain where each memory proximity domain has an associated SRAT entry that delineates the address range. The intent was that SRAT-defined device-dax instances are registered first. Then any left-over address range with the EFI_MEMORY_SP attribute, but not covered by the SRAT, would have a coarse grained device-dax instance established. However, the scheme to detect what ranges are left to be assigned to a device was buggy and resulted in multiple overlapping device-dax instances. Fix this by using explicit tracking for which ranges have been handled. Now, this new approach may leave memory stranded in the presence of broken platform firmware that fails to fully describe all EFI_MEMORY_SP ranges in the HMAT. That requires a deeper fix if it becomes a problem in practice. Reported-by: "Tallam Mahendra Kumar" Reported-by: Mustafa Hajeer Debugged-by: Vishal Verma Tested-by: Vishal Verma Link: https://lore.kernel.org/r/166890823379.4183293.15333502171004313377.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/dax/hmem/device.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 97086fab698e..903325aac991 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,13 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +static struct resource hmem_active = { + .name = "HMEM devices", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; + void hmem_register_device(int target_nid, struct resource *r) { /* define a clean / non-busy resource for the platform device */ @@ -41,6 +48,12 @@ void hmem_register_device(int target_nid, struct resource *r) goto out_pdev; } + if (!__request_region(&hmem_active, res.start, resource_size(&res), + dev_name(&pdev->dev), 0)) { + dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res); + goto out_active; + } + pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, @@ -66,6 +79,8 @@ void hmem_register_device(int target_nid, struct resource *r) return; out_resource: + __release_region(&hmem_active, res.start, resource_size(&res)); +out_active: platform_device_put(pdev); out_pdev: memregion_free(id); @@ -73,15 +88,6 @@ void hmem_register_device(int target_nid, struct resource *r) static __init int hmem_register_one(struct resource *res, void *data) { - /* - * If the resource is not a top-level resource it was already - * assigned to a device by the HMAT parsing. - */ - if (res->parent != &iomem_resource) { - pr_info("HMEM: skip %pr, already claimed\n", res); - return 0; - } - hmem_register_device(phys_to_target_node(res->start), res); return 0; From 394164f9d5a3020a7fd719d228386d48d544ec67 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Sun, 24 Jul 2022 09:49:07 +0300 Subject: [PATCH 532/963] net/mlx5: Do not query pci info while pci disabled The driver should not interact with PCI while PCI is disabled. Trying to do so may result in being unable to get vital signs during PCI reset, driver gets timed out and fails to recover. Fixes: fad1783a6d66 ("net/mlx5: Print more info on pci error handlers") Signed-off-by: Roy Novich Reviewed-by: Moshe Shemesh Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 283c4cc28944..e58775a7d955 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1798,7 +1798,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, res = state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; - mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res)); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", + __func__, dev->state, dev->pci_status, res, result2str(res)); return res; } @@ -1837,7 +1838,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); int err; - mlx5_pci_trace(dev, "Enter\n"); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", + __func__, dev->state, dev->pci_status); err = mlx5_pci_enable_device(dev); if (err) { @@ -1859,7 +1861,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) res = PCI_ERS_RESULT_RECOVERED; out: - mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res)); + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", + __func__, dev->state, dev->pci_status, err, res, result2str(res)); return res; } From 61db3d7b99a367416e489ccf764cc5f9b00d62a1 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 20 Oct 2022 12:25:59 +0300 Subject: [PATCH 533/963] net/mlx5: Fix FW tracer timestamp calculation Fix a bug in calculation of FW tracer timestamp. Decreasing one in the calculation should effect only bits 52_7 and not effect bits 6_0 of the timestamp, otherwise bits 6_0 are always set in this calculation. Fixes: 70dd6fdb8987 ("net/mlx5: FW tracer, parse traces and kernel tracing support") Signed-off-by: Moshe Shemesh Reviewed-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 978a2bb8e122..21831386b26e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -638,7 +638,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer, trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); else - trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) | + trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | (str_frmt->timestamp & MASK_6_0); mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp); From 4f57332d6a551185ba729617f04455e83fbe4e41 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 4 Aug 2022 12:38:41 +0300 Subject: [PATCH 534/963] net/mlx5: SF: Fix probing active SFs during driver probe phase When SF devices and SF port representors are located on different functions, unloading and reloading of SF parent driver doesn't recreate the existing SF present in the device. Fix it by querying SFs and probe active SFs during driver probe phase. Fixes: 90d010b8634b ("net/mlx5: SF, Add auxiliary device support") Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/sf/dev/dev.c | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index 7da012ff0d41..8e2abbab05f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -18,6 +18,10 @@ struct mlx5_sf_dev_table { phys_addr_t base_address; u64 sf_bar_length; struct notifier_block nb; + struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */ + struct workqueue_struct *active_wq; + struct work_struct work; + u8 stop_active_wq:1; struct mlx5_core_dev *dev; }; @@ -168,6 +172,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ return 0; sf_index = event->function_id - base_id; + mutex_lock(&table->table_lock); sf_dev = xa_load(&table->devices, sf_index); switch (event->new_vhca_state) { case MLX5_VHCA_STATE_INVALID: @@ -191,6 +196,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_ default: break; } + mutex_unlock(&table->table_lock); return 0; } @@ -215,6 +221,78 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) return 0; } +static void mlx5_sf_dev_add_active_work(struct work_struct *work) +{ + struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work); + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + u16 sw_func_id; + int err = 0; + u8 state; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = MLX5_CAP_GEN(dev, sf_base_id); + for (i = 0; i < max_functions; i++, function_id++) { + if (table->stop_active_wq) + return; + err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out)); + if (err) + /* A failure of specific vhca doesn't mean others will + * fail as well. + */ + continue; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state != MLX5_VHCA_STATE_ACTIVE) + continue; + + sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id); + mutex_lock(&table->table_lock); + /* Don't probe device which is already probe */ + if (!xa_load(&table->devices, i)) + mlx5_sf_dev_add(dev, i, function_id, sw_func_id); + /* There is a race where SF got inactive after the query + * above. e.g.: the query returns that the state of the + * SF is active, and after that the eswitch manager set it to + * inactive. + * This case cannot be managed in SW, since the probing of the + * SF is on one system, and the inactivation is on a different + * system. + * If the inactive is done after the SF perform init_hca(), + * the SF will fully probe and then removed. If it was + * done before init_hca(), the SF probe will fail. + */ + mutex_unlock(&table->table_lock); + } +} + +/* In case SFs are generated externally, probe active SFs */ +static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table) +{ + if (MLX5_CAP_GEN(table->dev, eswitch_manager)) + return 0; /* the table is local */ + + /* Use a workqueue to probe active SFs, which are in large + * quantity and may take up to minutes to probe. + */ + table->active_wq = create_singlethread_workqueue("mlx5_active_sf"); + if (!table->active_wq) + return -ENOMEM; + INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work); + queue_work(table->active_wq, &table->work); + return 0; +} + +static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table) +{ + if (table->active_wq) { + table->stop_active_wq = true; + destroy_workqueue(table->active_wq); + } +} + void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) { struct mlx5_sf_dev_table *table; @@ -240,11 +318,17 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) table->base_address = pci_resource_start(dev->pdev, 2); table->max_sfs = max_sfs; xa_init(&table->devices); + mutex_init(&table->table_lock); dev->priv.sf_dev_table = table; err = mlx5_vhca_event_notifier_register(dev, &table->nb); if (err) goto vhca_err; + + err = mlx5_sf_dev_queue_active_work(table); + if (err) + goto add_active_err; + err = mlx5_sf_dev_vhca_arm_all(table); if (err) goto arm_err; @@ -252,6 +336,8 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) return; arm_err: + mlx5_sf_dev_destroy_active_work(table); +add_active_err: mlx5_vhca_event_notifier_unregister(dev, &table->nb); vhca_err: table->max_sfs = 0; @@ -279,7 +365,9 @@ void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) if (!table) return; + mlx5_sf_dev_destroy_active_work(table); mlx5_vhca_event_notifier_unregister(dev, &table->nb); + mutex_destroy(&table->table_lock); /* Now that event handler is not running, it is safe to destroy * the sf device without race. From 870c2481174b839e7159555127bc8b5a5d0699ba Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 31 May 2022 09:14:03 +0300 Subject: [PATCH 535/963] net/mlx5: cmdif, Print info on any firmware cmd failure to tracepoint While moving to new CMD API (quiet API), some pre-existing flows may call the new API function that in case of error, returns the error instead of printing it as previously done. For such flows we bring back the print but to tracepoint this time for sys admins to have the ability to check for errors especially for commands using the new quiet API. Tracepoint output example: devlink-1333 [001] ..... 822.746922: mlx5_cmd: ACCESS_REG(0x805) op_mod(0x0) failed, status bad resource(0x5), syndrome (0xb06e1f), err(-22) Fixes: f23519e542e5 ("net/mlx5: cmdif, Add new api for command execution") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 41 +++++++++-------- .../mellanox/mlx5/core/diag/cmd_tracepoint.h | 45 +++++++++++++++++++ include/linux/mlx5/driver.h | 1 + 3 files changed, 68 insertions(+), 19 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2e0d59ca62b5..df3e284ca5c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -45,6 +45,8 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/tout.h" +#define CREATE_TRACE_POINTS +#include "diag/cmd_tracepoint.h" enum { CMD_IF_REV = 5, @@ -785,27 +787,14 @@ EXPORT_SYMBOL(mlx5_cmd_out_err); static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) { u16 opcode, op_mod; - u32 syndrome; - u8 status; u16 uid; - int err; - - syndrome = MLX5_GET(mbox_out, out, syndrome); - status = MLX5_GET(mbox_out, out, status); opcode = MLX5_GET(mbox_in, in, opcode); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); - err = cmd_status_to_err(status); - if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) mlx5_cmd_out_err(dev, opcode, op_mod, out); - else - mlx5_core_dbg(dev, - "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", - mlx5_command_str(opcode), opcode, op_mod, uid, - cmd_status_str(status), status, syndrome, err); } int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) @@ -1892,6 +1881,16 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, return err; } +static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) +{ + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); + u8 status = MLX5_GET(mbox_out, out, status); + + trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod, + cmd_status_str(status), status, syndrome, + cmd_status_to_err(status)); +} + static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, u32 syndrome, int err) { @@ -1914,7 +1913,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, } /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ -static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out) +static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out) { u32 syndrome = MLX5_GET(mbox_out, out, syndrome); u8 status = MLX5_GET(mbox_out, out, status); @@ -1922,8 +1921,10 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void * if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ err = -EIO; - if (!err && status != MLX5_CMD_STAT_OK) + if (!err && status != MLX5_CMD_STAT_OK) { err = -EREMOTEIO; + mlx5_cmd_err_trace(dev, opcode, op_mod, out); + } cmd_status_log(dev, opcode, status, syndrome, err); return err; @@ -1951,9 +1952,9 @@ int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); - return err; + return cmd_status_err(dev, err, opcode, op_mod, out); } EXPORT_SYMBOL(mlx5_cmd_do); @@ -1997,8 +1998,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); - err = cmd_status_err(dev, err, opcode, out); + err = cmd_status_err(dev, err, opcode, op_mod, out); return mlx5_cmd_check(dev, err, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec_polling); @@ -2034,7 +2036,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) struct mlx5_async_ctx *ctx; ctx = work->ctx; - status = cmd_status_err(ctx->dev, status, work->opcode, work->out); + status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); work->user_callback(status, work); if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); @@ -2049,6 +2051,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; work->opcode = MLX5_GET(mbox_in, in, opcode); + work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h new file mode 100644 index 000000000000..406ebe17405f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_CMD_TP_H_ + +#include +#include + +TRACE_EVENT(mlx5_cmd, + TP_PROTO(const char *command_str, u16 opcode, u16 op_mod, + const char *status_str, u8 status, u32 syndrome, int err), + TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err), + TP_STRUCT__entry(__string(command_str, command_str) + __field(u16, opcode) + __field(u16, op_mod) + __string(status_str, status_str) + __field(u8, status) + __field(u32, syndrome) + __field(int, err) + ), + TP_fast_assign(__assign_str(command_str, command_str); + __entry->opcode = opcode; + __entry->op_mod = op_mod; + __assign_str(status_str, status_str); + __entry->status = status; + __entry->syndrome = syndrome; + __entry->err = err; + ), + TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)", + __get_str(command_str), __entry->opcode, __entry->op_mod, + __get_str(status_str), __entry->status, __entry->syndrome, + __entry->err) +); + +#endif /* _MLX5_CMD_TP_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cmd_tracepoint +#include diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af2ceb4160bc..06cbad166225 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -981,6 +981,7 @@ struct mlx5_async_work { struct mlx5_async_ctx *ctx; mlx5_async_cbk_t user_callback; u16 opcode; /* cmd opcode */ + u16 op_mod; /* cmd op_mod */ void *out; /* pointer to the cmd output buffer */ }; From aaf2e65cac7f2e1ae729c2fbc849091df9699f96 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 17 Nov 2022 09:07:20 +0200 Subject: [PATCH 536/963] net/mlx5: Fix handling of entry refcount when command is not issued to FW In case command interface is down, or the command is not allowed, driver did not increment the entry refcount, but might have decrement as part of forced completion handling. Fix that by always increment and decrement the refcount to make it symmetric for all flows. Fixes: 50b2412b7e78 ("net/mlx5: Avoid possible free of command entry while timeout comp handler") Signed-off-by: Eran Ben Elisha Signed-off-by: Moshe Shemesh Reported-by: Jack Wang Tested-by: Jack Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index df3e284ca5c6..74bd05e5dda2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1005,6 +1005,7 @@ static void cmd_work_handler(struct work_struct *work) cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); + cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* Skip sending command to fw if internal error */ if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { ent->ret = -ENXIO; @@ -1012,7 +1013,6 @@ static void cmd_work_handler(struct work_struct *work) return; } - cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -1661,8 +1661,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force cmd_ent_put(ent); /* timeout work was canceled */ if (!forced || /* Real FW completion */ - pci_channel_offline(dev->pdev) || /* FW is inaccessible */ - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ + !opcode_allowed(cmd, ent->op)) cmd_ent_put(ent); ent->ts2 = ktime_get_ns(); From 0d4e8ed139d871fcb2844dd71075997753baeec8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 15 Aug 2022 11:25:26 +0300 Subject: [PATCH 537/963] net/mlx5: Lag, avoid lockdep warnings ldev->lock is used to serialize lag change operations. Since multiport eswtich functionality was added, we now change the mode dynamically. However, acquiring ldev->lock is not allowed as it could possibly lead to a deadlock as reported by the lockdep mechanism. [ 836.154963] WARNING: possible circular locking dependency detected [ 836.155850] 5.19.0-rc5_net_56b7df2 #1 Not tainted [ 836.156549] ------------------------------------------------------ [ 836.157418] handler1/12198 is trying to acquire lock: [ 836.158178] ffff888187d52b58 (&ldev->lock){+.+.}-{3:3}, at: mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.159575] [ 836.159575] but task is already holding lock: [ 836.160474] ffff8881d4de2930 (&block->cb_lock){++++}-{3:3}, at: tc_setup_cb_add+0x5b/0x200 [ 836.161669] which lock already depends on the new lock. [ 836.162905] [ 836.162905] the existing dependency chain (in reverse order) is: [ 836.164008] -> #3 (&block->cb_lock){++++}-{3:3}: [ 836.164946] down_write+0x25/0x60 [ 836.165548] tcf_block_get_ext+0x1c6/0x5d0 [ 836.166253] ingress_init+0x74/0xa0 [sch_ingress] [ 836.167028] qdisc_create.constprop.0+0x130/0x5e0 [ 836.167805] tc_modify_qdisc+0x481/0x9f0 [ 836.168490] rtnetlink_rcv_msg+0x16e/0x5a0 [ 836.169189] netlink_rcv_skb+0x4e/0xf0 [ 836.169861] netlink_unicast+0x190/0x250 [ 836.170543] netlink_sendmsg+0x243/0x4b0 [ 836.171226] sock_sendmsg+0x33/0x40 [ 836.171860] ____sys_sendmsg+0x1d1/0x1f0 [ 836.172535] ___sys_sendmsg+0xab/0xf0 [ 836.173183] __sys_sendmsg+0x51/0x90 [ 836.173836] do_syscall_64+0x3d/0x90 [ 836.174471] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 836.175282] [ 836.175282] -> #2 (rtnl_mutex){+.+.}-{3:3}: [ 836.176190] __mutex_lock+0x6b/0xf80 [ 836.176830] register_netdevice_notifier+0x21/0x120 [ 836.177631] rtnetlink_init+0x2d/0x1e9 [ 836.178289] netlink_proto_init+0x163/0x179 [ 836.178994] do_one_initcall+0x63/0x300 [ 836.179672] kernel_init_freeable+0x2cb/0x31b [ 836.180403] kernel_init+0x17/0x140 [ 836.181035] ret_from_fork+0x1f/0x30 [ 836.181687] -> #1 (pernet_ops_rwsem){+.+.}-{3:3}: [ 836.182628] down_write+0x25/0x60 [ 836.183235] unregister_netdevice_notifier+0x1c/0xb0 [ 836.184029] mlx5_ib_roce_cleanup+0x94/0x120 [mlx5_ib] [ 836.184855] __mlx5_ib_remove+0x35/0x60 [mlx5_ib] [ 836.185637] mlx5_eswitch_unregister_vport_reps+0x22f/0x440 [mlx5_core] [ 836.186698] auxiliary_bus_remove+0x18/0x30 [ 836.187409] device_release_driver_internal+0x1f6/0x270 [ 836.188253] bus_remove_device+0xef/0x160 [ 836.188939] device_del+0x18b/0x3f0 [ 836.189562] mlx5_rescan_drivers_locked+0xd6/0x2d0 [mlx5_core] [ 836.190516] mlx5_lag_remove_devices+0x69/0xe0 [mlx5_core] [ 836.191414] mlx5_do_bond_work+0x441/0x620 [mlx5_core] [ 836.192278] process_one_work+0x25c/0x590 [ 836.192963] worker_thread+0x4f/0x3d0 [ 836.193609] kthread+0xcb/0xf0 [ 836.194189] ret_from_fork+0x1f/0x30 [ 836.194826] -> #0 (&ldev->lock){+.+.}-{3:3}: [ 836.195734] __lock_acquire+0x15b8/0x2a10 [ 836.196426] lock_acquire+0xce/0x2d0 [ 836.197057] __mutex_lock+0x6b/0xf80 [ 836.197708] mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.198575] tc_act_parse_mirred+0x25b/0x800 [mlx5_core] [ 836.199467] parse_tc_actions+0x168/0x5a0 [mlx5_core] [ 836.200340] __mlx5e_add_fdb_flow+0x263/0x480 [mlx5_core] [ 836.201241] mlx5e_configure_flower+0x8a0/0x1820 [mlx5_core] [ 836.202187] tc_setup_cb_add+0xd7/0x200 [ 836.202856] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] [ 836.203739] fl_change+0xbbe/0x1730 [cls_flower] [ 836.204501] tc_new_tfilter+0x407/0xd90 [ 836.205168] rtnetlink_rcv_msg+0x406/0x5a0 [ 836.205877] netlink_rcv_skb+0x4e/0xf0 [ 836.206535] netlink_unicast+0x190/0x250 [ 836.207217] netlink_sendmsg+0x243/0x4b0 [ 836.207915] sock_sendmsg+0x33/0x40 [ 836.208538] ____sys_sendmsg+0x1d1/0x1f0 [ 836.209219] ___sys_sendmsg+0xab/0xf0 [ 836.209878] __sys_sendmsg+0x51/0x90 [ 836.210510] do_syscall_64+0x3d/0x90 [ 836.211137] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 836.211954] other info that might help us debug this: [ 836.213174] Chain exists of: [ 836.213174] &ldev->lock --> rtnl_mutex --> &block->cb_lock 836.214650] Possible unsafe locking scenario: [ 836.214650] [ 836.215574] CPU0 CPU1 [ 836.216255] ---- ---- [ 836.216943] lock(&block->cb_lock); [ 836.217518] lock(rtnl_mutex); [ 836.218348] lock(&block->cb_lock); [ 836.219212] lock(&ldev->lock); [ 836.219758] [ 836.219758] *** DEADLOCK *** [ 836.219758] [ 836.220747] 2 locks held by handler1/12198: [ 836.221390] #0: ffff8881d4de2930 (&block->cb_lock){++++}-{3:3}, at: tc_setup_cb_add+0x5b/0x200 [ 836.222646] #1: ffff88810c9a92c0 (&esw->mode_lock){++++}-{3:3}, at: mlx5_esw_hold+0x39/0x50 [mlx5_core] [ 836.224063] stack backtrace: [ 836.224799] CPU: 6 PID: 12198 Comm: handler1 Not tainted 5.19.0-rc5_net_56b7df2 #1 [ 836.225923] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 836.227476] Call Trace: [ 836.227929] [ 836.228332] dump_stack_lvl+0x57/0x7d [ 836.228924] check_noncircular+0x104/0x120 [ 836.229562] __lock_acquire+0x15b8/0x2a10 [ 836.230201] lock_acquire+0xce/0x2d0 [ 836.230776] ? mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.231614] ? find_held_lock+0x2b/0x80 [ 836.232221] __mutex_lock+0x6b/0xf80 [ 836.232799] ? mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.233636] ? mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.234451] ? xa_load+0xc3/0x190 [ 836.234995] mlx5_lag_do_mirred+0x3b/0x70 [mlx5_core] [ 836.235803] tc_act_parse_mirred+0x25b/0x800 [mlx5_core] [ 836.236636] ? tc_act_can_offload_mirred+0x135/0x210 [mlx5_core] [ 836.237550] parse_tc_actions+0x168/0x5a0 [mlx5_core] [ 836.238364] __mlx5e_add_fdb_flow+0x263/0x480 [mlx5_core] [ 836.239202] mlx5e_configure_flower+0x8a0/0x1820 [mlx5_core] [ 836.240076] ? lock_acquire+0xce/0x2d0 [ 836.240668] ? tc_setup_cb_add+0x5b/0x200 [ 836.241294] tc_setup_cb_add+0xd7/0x200 [ 836.241917] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] [ 836.242709] fl_change+0xbbe/0x1730 [cls_flower] [ 836.243408] tc_new_tfilter+0x407/0xd90 [ 836.244043] ? tc_del_tfilter+0x880/0x880 [ 836.244672] rtnetlink_rcv_msg+0x406/0x5a0 [ 836.245310] ? netlink_deliver_tap+0x7a/0x4b0 [ 836.245991] ? if_nlmsg_stats_size+0x2b0/0x2b0 [ 836.246675] netlink_rcv_skb+0x4e/0xf0 [ 836.258046] netlink_unicast+0x190/0x250 [ 836.258669] netlink_sendmsg+0x243/0x4b0 [ 836.259288] sock_sendmsg+0x33/0x40 [ 836.259857] ____sys_sendmsg+0x1d1/0x1f0 [ 836.260473] ___sys_sendmsg+0xab/0xf0 [ 836.261064] ? lock_acquire+0xce/0x2d0 [ 836.261669] ? find_held_lock+0x2b/0x80 [ 836.262272] ? __fget_files+0xb9/0x190 [ 836.262871] ? __fget_files+0xd3/0x190 [ 836.263462] __sys_sendmsg+0x51/0x90 [ 836.264064] do_syscall_64+0x3d/0x90 [ 836.264652] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 836.265425] RIP: 0033:0x7fdbe5e2677d [ 836.266012] Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 ba ee ff ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 44 24 08 e8 ee ee ff ff 48 [ 836.268485] RSP: 002b:00007fdbe48a75a0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e [ 836.269598] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007fdbe5e2677d [ 836.270576] RDX: 0000000000000000 RSI: 00007fdbe48a7640 RDI: 000000000000003c [ 836.271565] RBP: 00007fdbe48a8368 R08: 0000000000000000 R09: 0000000000000000 [ 836.272546] R10: 00007fdbe48a84b0 R11: 0000000000000293 R12: 0000557bd17dc860 [ 836.273527] R13: 0000000000000000 R14: 0000557bd17dc860 R15: 00007fdbe48a7640 [ 836.274521] To avoid using mode holding ldev->lock in the configure flow, we queue a work to the lag workqueue and cease wait on a completion object. In addition, we remove the lock from mlx5_lag_do_mirred() since it is not really protecting anything. It should be noted that an actual deadlock has not been observed. Signed-off-by: Eli Cohen Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 3 +- .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 14 ++- .../ethernet/mellanox/mlx5/core/lag/mpesw.c | 108 +++++++++++------- .../ethernet/mellanox/mlx5/core/lag/mpesw.h | 1 - 4 files changed, 82 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index a9f4ede4a9bf..be1307a63e6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -228,9 +228,8 @@ static void mlx5_ldev_free(struct kref *ref) if (ldev->nb.notifier_call) unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); - mlx5_lag_mpesw_cleanup(ldev); - cancel_work_sync(&ldev->mpesw_work); destroy_workqueue(ldev->wq); + mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); kfree(ldev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index ce2ce8ccbd70..f30ac2de639f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -50,6 +50,19 @@ struct lag_tracker { enum netdev_lag_hash hash_type; }; +enum mpesw_op { + MLX5_MPESW_OP_ENABLE, + MLX5_MPESW_OP_DISABLE, +}; + +struct mlx5_mpesw_work_st { + struct work_struct work; + struct mlx5_lag *lag; + enum mpesw_op op; + struct completion comp; + int result; +}; + /* LAG data of a ConnectX card. * It serves both its phys functions. */ @@ -66,7 +79,6 @@ struct mlx5_lag { struct lag_tracker tracker; struct workqueue_struct *wq; struct delayed_work bond_work; - struct work_struct mpesw_work; struct notifier_block nb; struct lag_mp lag_mp; struct mlx5_lag_port_sel port_sel; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index f643202b29c6..c17e8f1ec914 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -7,63 +7,95 @@ #include "eswitch.h" #include "lib/mlx5.h" -void mlx5_mpesw_work(struct work_struct *work) +static int add_mpesw_rule(struct mlx5_lag *ldev) { - struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work); + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int err; - mutex_lock(&ldev->lock); - mlx5_disable_lag(ldev); - mutex_unlock(&ldev->lock); + if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) + return 0; + + if (ldev->mode != MLX5_LAG_MODE_NONE) { + err = -EINVAL; + goto out_err; + } + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + if (err) { + mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); + goto out_err; + } + + return 0; + +out_err: + atomic_dec(&ldev->lag_mpesw.mpesw_rule_count); + return err; } -static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev) +static void del_mpesw_rule(struct mlx5_lag *ldev) { - struct mlx5_lag *ldev = dev->priv.lag; - - if (!queue_work(ldev->wq, &ldev->mpesw_work)) - mlx5_core_warn(dev, "failed to queue work\n"); -} - -void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev = dev->priv.lag; - - if (!ldev) - return; - - mutex_lock(&ldev->lock); if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && ldev->mode == MLX5_LAG_MODE_MPESW) - mlx5_lag_disable_mpesw(dev); - mutex_unlock(&ldev->lock); + mlx5_disable_lag(ldev); } -int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +static void mlx5_mpesw_work(struct work_struct *work) +{ + struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); + struct mlx5_lag *ldev = mpesww->lag; + + mutex_lock(&ldev->lock); + if (mpesww->op == MLX5_MPESW_OP_ENABLE) + mpesww->result = add_mpesw_rule(ldev); + else if (mpesww->op == MLX5_MPESW_OP_DISABLE) + del_mpesw_rule(ldev); + mutex_unlock(&ldev->lock); + + complete(&mpesww->comp); +} + +static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev, + enum mpesw_op op) { struct mlx5_lag *ldev = dev->priv.lag; + struct mlx5_mpesw_work_st *work; int err = 0; if (!ldev) return 0; - mutex_lock(&ldev->lock); - if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) - goto out; + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; - if (ldev->mode != MLX5_LAG_MODE_NONE) { + INIT_WORK(&work->work, mlx5_mpesw_work); + init_completion(&work->comp); + work->op = op; + work->lag = ldev; + + if (!queue_work(ldev->wq, &work->work)) { + mlx5_core_warn(dev, "failed to queue mpesw work\n"); err = -EINVAL; goto out; } - - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); - if (err) - mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); - + wait_for_completion(&work->comp); + err = work->result; out: - mutex_unlock(&ldev->lock); + kfree(work); return err; } +void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +{ + mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); +} + +int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +{ + return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); +} + int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) { struct mlx5_lag *ldev = mdev->priv.lag; @@ -71,12 +103,9 @@ int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) if (!netif_is_bond_master(out_dev) || !ldev) return 0; - mutex_lock(&ldev->lock); - if (ldev->mode == MLX5_LAG_MODE_MPESW) { - mutex_unlock(&ldev->lock); + if (ldev->mode == MLX5_LAG_MODE_MPESW) return -EOPNOTSUPP; - } - mutex_unlock(&ldev->lock); + return 0; } @@ -90,11 +119,10 @@ bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev) void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) { - INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work); atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); } void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) { - cancel_delayed_work_sync(&ldev->bond_work); + WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index be4abcb8fcd5..88e8daffcf92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -12,7 +12,6 @@ struct lag_mpesw { atomic_t mpesw_rule_count; }; -void mlx5_mpesw_work(struct work_struct *work); int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev); bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); #if IS_ENABLED(CONFIG_MLX5_ESWITCH) From 6d942e40448931be9371f1ba8cb592778807ce18 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 16 Nov 2022 11:10:15 +0200 Subject: [PATCH 538/963] net/mlx5: E-Switch, Set correctly vport destination The cited commit moved from using reformat_id integer to packet_reformat pointer which introduced the possibility to null pointer dereference. When setting packet reformat flag and pkt_reformat pointer must exists so checking MLX5_ESW_DEST_ENCAP is not enough, we need to make sure the pkt_reformat is valid and check for MLX5_ESW_DEST_ENCAP_VALID. If the dest encap valid flag does not exists then pkt_reformat can be either invalid address or null. Also, to make sure we don't try to access invalid pkt_reformat set it to null when invalidated and invalidate it before calling add flow code as its logically more correct and to be safe. Fixes: 2b688ea5efde ("net/mlx5: Add flow steering actions to fs_cmd shim layer") Signed-off-by: Roi Dayan Reviewed-by: Chris Mi Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 10 ++++++---- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 5aff97914367..5b6a79d2034e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -224,15 +224,16 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, list_for_each_entry(flow, flow_list, tmp_list) { if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) continue; - spec = &flow->attr->parse_attr->spec; - - /* update from encap rule to slow path rule */ - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; /* mark the flow's encap dest as non-valid */ esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + + /* update from encap rule to slow path rule */ + spec = &flow->attr->parse_attr->spec; + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -251,6 +252,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, /* we know that the encap is valid */ e->flags &= ~MLX5_ENCAP_ENTRY_VALID; mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); + e->pkt_reformat = NULL; } static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 728ca9f2bb9d..3fda75fe168c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -433,7 +433,7 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f mlx5_lag_mpesw_is_activated(esw->dev)) dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; } - if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat; From e1ad07b9227f9cbaf4bd2b6ec00b84c303657593 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sat, 29 Oct 2022 09:03:48 +0300 Subject: [PATCH 539/963] net/mlx5: Fix sync reset event handler error flow When sync reset now event handling fails on mlx5_pci_link_toggle() then no reset was done. However, since mlx5_cmd_fast_teardown_hca() was already done, the firmware function is closed and the driver is left without firmware functionality. Fix it by setting device error state and reopen the firmware resources. Reopening is done by the thread that was called for devlink reload fw_activate as it already holds the devlink lock. Fixes: 5ec697446f46 ("net/mlx5: Add support for devlink reload action fw activate") Signed-off-by: Moshe Shemesh Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 9d908a0ccfef..1e46f9afa40e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -9,7 +9,8 @@ enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, MLX5_FW_RESET_FLAGS_PENDING_COMP, - MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED }; struct mlx5_fw_reset { @@ -406,7 +407,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) err = mlx5_pci_link_toggle(dev); if (err) { mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); - goto done; + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); } mlx5_enter_error_state(dev, true); @@ -482,6 +483,10 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) goto out; } err = fw_reset->ret; + if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { + mlx5_unload_one_devl_locked(dev); + mlx5_load_one_devl_locked(dev, false); + } out: clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); return err; From 3e874cb1e0a376b0e682f82c1612ae89ab7867d7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 27 Oct 2022 08:29:06 +0300 Subject: [PATCH 540/963] net/mlx5e: Fix missing alignment in size of MTT/KLM entries In the cited patch, an alignment required by the HW spec was mistakenly dropped. Bring it back to fix error completions like the below: mlx5_core 0000:00:08.0 eth2: Error cqe on cqn 0x40b, ci 0x0, qn 0x104f, opcode 0xd, syndrome 0x2, vendor syndrome 0x68 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 86 00 68 02 25 00 10 4f 00 00 bb d2 WQE DUMP: WQ size 1024 WQ cur size 0, WQE index 0x0, len: 192 00000000: 00 00 00 25 00 10 4f 0c 00 00 00 00 00 18 2e 00 00000010: 90 00 00 00 00 02 00 00 00 00 00 00 20 00 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00000080: 08 00 00 00 48 6a 00 02 08 00 00 00 0e 10 00 02 00000090: 08 00 00 00 0c db 00 02 08 00 00 00 0e 82 00 02 000000a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 000000b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fixes: 9f123f740428 ("net/mlx5e: Improve MTT/KSM alignment") Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e3a4f01bcceb..5e41dfdf79c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -206,10 +206,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); + u32 sz; - WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD); + sz = ALIGN(entries * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT); - return entries * umr_entry_size / MLX5_OCTWORD; + return sz / MLX5_OCTWORD; } static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, From f377422044b2093c835e5f3717f8c8c58da1db1f Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 17 Nov 2022 07:45:45 +0200 Subject: [PATCH 541/963] net/mlx5e: Offload rule only when all encaps are valid The cited commit adds a for loop to support multiple encapsulations. But it only checks if the last encap is valid. Fix it by setting slow path flag when one of the encap is invalid. Fixes: f493f15534ec ("net/mlx5e: Move flow attr reformat action bit to per dest flags") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/tc_tun_encap.c | 6 ++---- .../mellanox/mlx5/core/en/tc_tun_encap.h | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 17 ++++++----------- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 5b6a79d2034e..ff73d25bc6eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -764,8 +764,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid) + struct net_device **encap_dev) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -880,9 +879,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, if (e->flags & MLX5_ENCAP_ENTRY_VALID) { attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; - *encap_valid = true; } else { - *encap_valid = false; + flow_flag_set(flow, SLOW); } mutex_unlock(&esw->offloads.encap_tbl_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index d542b8476491..8ad273dde40e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -17,8 +17,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, struct net_device *mirred_dev, int out_index, struct netlink_ext_ack *extack, - struct net_device **encap_dev, - bool *encap_valid); + struct net_device **encap_dev); int mlx5e_attach_decap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5a6aa61ec82a..bd9936af4582 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1634,7 +1634,6 @@ set_encap_dests(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, struct netlink_ext_ack *extack, - bool *encap_valid, bool *vf_tun) { struct mlx5e_tc_flow_parse_attr *parse_attr; @@ -1651,7 +1650,6 @@ set_encap_dests(struct mlx5e_priv *priv, parse_attr = attr->parse_attr; esw_attr = attr->esw_attr; *vf_tun = false; - *encap_valid = true; for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { struct net_device *out_dev; @@ -1668,7 +1666,7 @@ set_encap_dests(struct mlx5e_priv *priv, goto out; } err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, - extack, &encap_dev, encap_valid); + extack, &encap_dev); dev_put(out_dev); if (err) goto out; @@ -1732,8 +1730,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; - bool vf_tun, encap_valid; u32 max_prio, max_chain; + bool vf_tun; int err = 0; parse_attr = attr->parse_attr; @@ -1823,7 +1821,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->int_port = int_port; } - err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun); + err = set_encap_dests(priv, flow, attr, extack, &vf_tun); if (err) goto err_out; @@ -1853,7 +1851,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, * (1) there's no error * (2) there's an encap action and we don't have valid neigh */ - if (!encap_valid || flow_flag_test(flow, SLOW)) + if (flow_flag_test(flow, SLOW)) flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); else flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); @@ -3759,7 +3757,7 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) struct mlx5e_post_act *post_act = get_post_action(flow->priv); struct mlx5_flow_attr *attr, *next_attr = NULL; struct mlx5e_post_act_handle *handle; - bool vf_tun, encap_valid = true; + bool vf_tun; int err; /* This is going in reverse order as needed. @@ -3781,13 +3779,10 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) if (list_is_last(&attr->list, &flow->attrs)) break; - err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun); + err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); if (err) goto out_free; - if (!encap_valid) - flow_flag_set(flow, SLOW); - err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); if (err) goto out_free; From 11abca031ee34d8d50876e899cb2875d8fac01df Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 14 Nov 2022 11:56:11 +0200 Subject: [PATCH 542/963] net/mlx5e: Remove leftovers from old XSK queues enumeration Before the cited commit, for N channels, a dedicated set of N queues was created to support XSK, in indices [N, 2N-1], doubling the number of queues. In addition, changing the number of channels was prohibited, as it would shift the indices. Remove these two leftovers, as we moved XSK to a new queueing scheme, starting from index 0. Fixes: 3db4c85cde7a ("net/mlx5e: xsk: Use queue indices starting from 0 for XSK queues") Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 24aa25da482b..1728e197558d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -35,7 +35,6 @@ #include "en.h" #include "en/port.h" #include "en/params.h" -#include "en/xsk/pool.h" #include "en/ptp.h" #include "lib/clock.h" #include "en/fs_ethtool.h" @@ -412,15 +411,8 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { mutex_lock(&priv->state_lock); - ch->max_combined = priv->max_nch; ch->combined_count = priv->channels.params.num_channels; - if (priv->xsk.refcnt) { - /* The upper half are XSK queues. */ - ch->max_combined *= 2; - ch->combined_count *= 2; - } - mutex_unlock(&priv->state_lock); } @@ -454,16 +446,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); - /* Don't allow changing the number of channels if there is an active - * XSK, because the numeration of the XSK and regular RQs will change. - */ - if (priv->xsk.refcnt) { - err = -EINVAL; - netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", - __func__); - goto out; - } - /* Don't allow changing the number of channels if HTB offload is active, * because the numeration of the QoS SQs will change, while per-queue * qdiscs are attached. From d20a56b0eb006096a023a59efccb27a277b38344 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Sun, 30 Oct 2022 11:43:24 +0200 Subject: [PATCH 543/963] net/mlx5e: Fix MACsec SA initialization routine Currently as part of MACsec SA initialization routine extended packet number (EPN) object attribute is always being set without checking if EPN is actually enabled, the above could lead to a NULL dereference. Fix by adding such a check. Fixes: 4411a6c0abd3 ("net/mlx5e: Support MACsec offload extended packet number (EPN)") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/macsec.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 2ef36cb9555a..8f8a735a4501 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -368,15 +368,15 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - if (is_tx) { - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci); - key = &ctx->sa.tx_sa->key; - } else { - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - key = &ctx->sa.rx_sa->key; + key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; + + if (sa->epn_state.epn_enabled) { + obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : + cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); + + memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); } - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); obj_attrs.replay_window = ctx->secy->replay_window; obj_attrs.replay_protect = ctx->secy->replay_protect; From 94ffd6e0c7dbcffbcded79e283aefbee3499af96 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Sun, 30 Oct 2022 11:52:42 +0200 Subject: [PATCH 544/963] net/mlx5e: Fix MACsec update SecY Currently updating SecY destroys and re-creates RX SA objects, the re-created RX SA objects are not identical to the destroyed objects and it disagree on the encryption enabled property which holds the value false after recreation, this value is not supported with offload which leads to no traffic after an update. Fix by recreating an identical objects. Fixes: 5a39816a75e5 ("net/mlx5e: Add MACsec offload SecY support") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 8f8a735a4501..4f96c69c6cc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1155,7 +1155,7 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx, continue; if (rx_sa->active) { - err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) goto out; } From 8514e325ef016e3fdabaa015ed1adaa6e6d8722a Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Sun, 30 Oct 2022 11:19:52 +0200 Subject: [PATCH 545/963] net/mlx5e: Fix possible race condition in macsec extended packet number update routine Currenty extended packet number (EPN) update routine is accessing macsec object without holding the general macsec lock hence facing a possible race condition when an EPN update occurs while updating or deleting the SA. Fix by holding the general macsec lock before accessing the object. Fixes: 4411a6c0abd3 ("net/mlx5e: Support MACsec offload extended packet number (EPN)") Signed-off-by: Emeel Hakim Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 4f96c69c6cc4..3dc6c987b8da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1536,6 +1536,8 @@ static void macsec_async_event(struct work_struct *work) async_work = container_of(work, struct mlx5e_macsec_async_work, work); macsec = async_work->macsec; + mutex_lock(&macsec->lock); + mdev = async_work->mdev; obj_id = async_work->obj_id; macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id); @@ -1557,6 +1559,7 @@ static void macsec_async_event(struct work_struct *work) out_async_work: kfree(async_work); + mutex_unlock(&macsec->lock); } static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data) From 1f0dd412e34e177621769866bef347f0b22364df Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 18 Nov 2022 10:36:35 +0000 Subject: [PATCH 546/963] net: phy: at803x: fix error return code in at803x_probe() Fix to return a negative error code from the ccr read error handling case instead of 0, as done elsewhere in this function. Fixes: 3265f4218878 ("net: phy: at803x: add fiber support") Signed-off-by: Wei Yongjun Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221118103635.254256-1-weiyongjun@huaweicloud.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/at803x.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 349b7b1dbbf2..d49965907561 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -870,8 +870,10 @@ static int at803x_probe(struct phy_device *phydev) .wolopts = 0, }; - if (ccr < 0) + if (ccr < 0) { + ret = ccr; goto err; + } mode_cfg = ccr & AT803X_MODE_CFG_MASK; switch (mode_cfg) { From 0e5d56c64afcd6fd2d132ea972605b66f8a7d3c4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 18 Nov 2022 16:45:00 -0500 Subject: [PATCH 547/963] tipc: set con sock in tipc_conn_alloc A crash was reported by Wei Chen: BUG: kernel NULL pointer dereference, address: 0000000000000018 RIP: 0010:tipc_conn_close+0x12/0x100 Call Trace: tipc_topsrv_exit_net+0x139/0x320 ops_exit_list.isra.9+0x49/0x80 cleanup_net+0x31a/0x540 process_one_work+0x3fa/0x9f0 worker_thread+0x42/0x5c0 It was caused by !con->sock in tipc_conn_close(). In tipc_topsrv_accept(), con is allocated in conn_idr then its sock is set: con = tipc_conn_alloc(); ... <----[1] con->sock = newsock; If tipc_conn_close() is called in anytime of [1], the null-pointer-def is triggered by con->sock->sk due to con->sock is not yet set. This patch fixes it by moving the con->sock setting to tipc_conn_alloc() under s->idr_lock. So that con->sock can never be NULL when getting the con from s->conn_idr. It will be also safer to move con->server and flag CF_CONNECTED setting under s->idr_lock, as they should all be set before tipc_conn_alloc() is called. Fixes: c5fa7b3cf3cb ("tipc: introduce new TIPC server infrastructure") Reported-by: Wei Chen Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: Jakub Kicinski --- net/tipc/topsrv.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index d92ec92f0b71..b0f9aa521670 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -176,7 +176,7 @@ static void tipc_conn_close(struct tipc_conn *con) conn_put(con); } -static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) +static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock) { struct tipc_conn *con; int ret; @@ -202,10 +202,11 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s) } con->conid = ret; s->idr_in_use++; - spin_unlock_bh(&s->idr_lock); set_bit(CF_CONNECTED, &con->flags); con->server = s; + con->sock = sock; + spin_unlock_bh(&s->idr_lock); return con; } @@ -467,7 +468,7 @@ static void tipc_topsrv_accept(struct work_struct *work) ret = kernel_accept(lsock, &newsock, O_NONBLOCK); if (ret < 0) return; - con = tipc_conn_alloc(srv); + con = tipc_conn_alloc(srv, newsock); if (IS_ERR(con)) { ret = PTR_ERR(con); sock_release(newsock); @@ -479,7 +480,6 @@ static void tipc_topsrv_accept(struct work_struct *work) newsk->sk_data_ready = tipc_conn_data_ready; newsk->sk_write_space = tipc_conn_write_space; newsk->sk_user_data = con; - con->sock = newsock; write_unlock_bh(&newsk->sk_callback_lock); /* Wake up receive process in case of 'SYN+' message */ @@ -577,12 +577,11 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, sub.filter = filter; *(u64 *)&sub.usr_handle = (u64)port; - con = tipc_conn_alloc(tipc_topsrv(net)); + con = tipc_conn_alloc(tipc_topsrv(net), NULL); if (IS_ERR(con)) return false; *conid = con->conid; - con->sock = NULL; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); if (rc >= 0) return true; From a7b42969d63f47320853a802efd879fbdc4e010e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 18 Nov 2022 16:45:01 -0500 Subject: [PATCH 548/963] tipc: add an extra conn_get in tipc_conn_alloc One extra conn_get() is needed in tipc_conn_alloc(), as after tipc_conn_alloc() is called, tipc_conn_close() may free this con before deferencing it in tipc_topsrv_accept(): tipc_conn_alloc(); newsk = newsock->sk; <---- tipc_conn_close(); write_lock_bh(&sk->sk_callback_lock); newsk->sk_data_ready = tipc_conn_data_ready; Then an uaf issue can be triggered: BUG: KASAN: use-after-free in tipc_topsrv_accept+0x1e7/0x370 [tipc] Call Trace: dump_stack_lvl+0x33/0x46 print_report+0x178/0x4b0 kasan_report+0x8c/0x100 kasan_check_range+0x179/0x1e0 tipc_topsrv_accept+0x1e7/0x370 [tipc] process_one_work+0x6a3/0x1030 worker_thread+0x8a/0xdf0 This patch fixes it by holding it in tipc_conn_alloc(), then after all accessing in tipc_topsrv_accept() releasing it. Note when does this in tipc_topsrv_kern_subscr(), as tipc_conn_rcv_sub() returns 0 or -1 only, we don't need to check for "> 0". Fixes: c5fa7b3cf3cb ("tipc: introduce new TIPC server infrastructure") Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: Jakub Kicinski --- net/tipc/topsrv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index b0f9aa521670..e3b427a70398 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -206,6 +206,7 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *s set_bit(CF_CONNECTED, &con->flags); con->server = s; con->sock = sock; + conn_get(con); spin_unlock_bh(&s->idr_lock); return con; @@ -484,6 +485,7 @@ static void tipc_topsrv_accept(struct work_struct *work) /* Wake up receive process in case of 'SYN+' message */ newsk->sk_data_ready(newsk); + conn_put(con); } } @@ -583,10 +585,11 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, *conid = con->conid; rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub); - if (rc >= 0) - return true; + if (rc) + conn_put(con); + conn_put(con); - return false; + return !rc; } void tipc_topsrv_kern_unsubscr(struct net *net, int conid) From cd0f6421162201e4b22ce757a1966729323185eb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 19 Nov 2022 15:28:32 +0800 Subject: [PATCH 549/963] tipc: check skb_linearize() return value in tipc_disc_rcv() If skb_linearize() fails in tipc_disc_rcv(), we need to free the skb instead of handle it. Fixes: 25b0b9c4e835 ("tipc: handle collisions of 32-bit node address hash values") Signed-off-by: YueHaibing Acked-by: Jon Maloy Link: https://lore.kernel.org/r/20221119072832.7896-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- net/tipc/discover.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index e8630707901e..e8dcdf267c0c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -211,7 +211,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u32 self; int err; - skb_linearize(skb); + if (skb_linearize(skb)) { + kfree_skb(skb); + return; + } hdr = buf_msg(skb); if (caps & TIPC_NODE_ID128) From 30f158740984f9949765f6112456d62d2ca6deba Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 18 Nov 2022 14:27:29 -0800 Subject: [PATCH 550/963] ice: fix handling of burst Tx timestamps Commit 1229b33973c7 ("ice: Add low latency Tx timestamp read") refactored PTP timestamping logic to use a threaded IRQ instead of a separate kthread. This implementation introduced ice_misc_intr_thread_fn and redefined the ice_ptp_process_ts function interface to return a value of whether or not the timestamp processing was complete. ice_misc_intr_thread_fn would take the return value from ice_ptp_process_ts and convert it into either IRQ_HANDLED if there were no more timestamps to be processed, or IRQ_WAKE_THREAD if the thread should continue processing. This is not correct, as the kernel does not re-schedule threaded IRQ functions automatically. IRQ_WAKE_THREAD can only be used by the main IRQ function. This results in the ice_ptp_process_ts function (and in turn the ice_ptp_tx_tstamp function) from only being called exactly once per interrupt. If an application sends a burst of Tx timestamps without waiting for a response, the interrupt will trigger for the first timestamp. However, later timestamps may not have arrived yet. This can result in dropped or discarded timestamps. Worse, on E822 hardware this results in the interrupt logic getting stuck such that no future interrupts will be triggered. The result is complete loss of Tx timestamp functionality. Fix this by modifying the ice_misc_intr_thread_fn to perform its own polling of the ice_ptp_process_ts function. We sleep for a few microseconds between attempts to avoid wasting significant CPU time. The value was chosen to allow time for the Tx timestamps to complete without wasting so much time that we overrun application wait budgets in the worst case. The ice_ptp_process_ts function also currently returns false in the event that the Tx tracker is not initialized. This would result in the threaded IRQ handler never exiting if it gets started while the tracker is not initialized. Fix the function to appropriately return true when the tracker is not initialized. Note that this will not reproduce with default ptp4l behavior, as the program always synchronously waits for a timestamp response before sending another timestamp request. Reported-by: Siddaraju DH Fixes: 1229b33973c7 ("ice: Add low latency Tx timestamp read") Signed-off-by: Jacob Keller Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20221118222729.1565317-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 12 ++++++------ drivers/net/ethernet/intel/ice/ice_ptp.c | 20 ++++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0f6718719453..ca2898467dcb 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3145,15 +3145,15 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) */ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) { - irqreturn_t ret = IRQ_HANDLED; struct ice_pf *pf = data; - bool irq_handled; - irq_handled = ice_ptp_process_ts(pf); - if (!irq_handled) - ret = IRQ_WAKE_THREAD; + if (ice_is_reset_in_progress(pf->state)) + return IRQ_HANDLED; - return ret; + while (!ice_ptp_process_ts(pf)) + usleep_range(50, 100); + + return IRQ_HANDLED; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 011b727ab190..0f668468d141 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -614,11 +614,14 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * 2) extend the 40b timestamp value to get a 64bit timestamp * 3) send that timestamp to the stack * - * After looping, if we still have waiting SKBs, return true. This may cause us - * effectively poll even when not strictly necessary. We do this because it's - * possible a new timestamp was requested around the same time as the interrupt. - * In some cases hardware might not interrupt us again when the timestamp is - * captured. + * Returns true if all timestamps were handled, and false if any slots remain + * without a timestamp. + * + * After looping, if we still have waiting SKBs, return false. This may cause + * us effectively poll even when not strictly necessary. We do this because + * it's possible a new timestamp was requested around the same time as the + * interrupt. In some cases hardware might not interrupt us again when the + * timestamp is captured. * * Note that we only take the tracking lock when clearing the bit and when * checking if we need to re-queue this task. The only place where bits can be @@ -641,7 +644,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) u8 idx; if (!tx->init) - return false; + return true; ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); @@ -2381,10 +2384,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) */ bool ice_ptp_process_ts(struct ice_pf *pf) { - if (pf->ptp.port.tx.init) - return ice_ptp_tx_tstamp(&pf->ptp.port.tx); - - return false; + return ice_ptp_tx_tstamp(&pf->ptp.port.tx); } static void ice_ptp_periodic_work(struct kthread_work *work) From 4e45886956a20942800259f326a04417292ae314 Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Sun, 20 Nov 2022 18:57:59 +0800 Subject: [PATCH 551/963] zonefs: Fix race between modprobe and mount There is a race between modprobe and mount as below: modprobe zonefs | mount -t zonefs --------------------------------|------------------------- zonefs_init | register_filesystem [1] | | zonefs_fill_super [2] zonefs_sysfs_init [3] | 1. register zonefs suceess, then 2. user can mount the zonefs 3. if sysfs initialize failed, the module initialize failed. Then the mount process maybe some error happened since the module initialize failed. Let's register zonefs after all dependency resource ready. And reorder the dependency resource release in module exit. Fixes: 9277a6d4fbd4 ("zonefs: Export open zone resource information through sysfs") Signed-off-by: Zhang Xiaoxu Reviewed-by: Johannes Thumshirn Reviewed-by: Chaitanya Kulkarni Signed-off-by: Damien Le Moal --- fs/zonefs/super.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index abc9a85106f2..f0e8a000f073 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1922,18 +1922,18 @@ static int __init zonefs_init(void) if (ret) return ret; - ret = register_filesystem(&zonefs_type); + ret = zonefs_sysfs_init(); if (ret) goto destroy_inodecache; - ret = zonefs_sysfs_init(); + ret = register_filesystem(&zonefs_type); if (ret) - goto unregister_fs; + goto sysfs_exit; return 0; -unregister_fs: - unregister_filesystem(&zonefs_type); +sysfs_exit: + zonefs_sysfs_exit(); destroy_inodecache: zonefs_destroy_inodecache(); @@ -1942,9 +1942,9 @@ static int __init zonefs_init(void) static void __exit zonefs_exit(void) { + unregister_filesystem(&zonefs_type); zonefs_sysfs_exit(); zonefs_destroy_inodecache(); - unregister_filesystem(&zonefs_type); } MODULE_AUTHOR("Damien Le Moal"); From b97df039a68b2f3e848e238df5d5d06343ea497b Mon Sep 17 00:00:00 2001 From: Thomas Jarosch Date: Wed, 2 Nov 2022 11:18:48 +0100 Subject: [PATCH 552/963] xfrm: Fix oops in __xfrm_state_delete() Kernel 5.14 added a new "byseq" index to speed up xfrm_state lookups by sequence number in commit fe9f1d8779cb ("xfrm: add state hashtable keyed by seq") While the patch was thorough, the function pfkey_send_new_mapping() in net/af_key.c also modifies x->km.seq and never added the current xfrm_state to the "byseq" index. This leads to the following kernel Ooops: BUG: kernel NULL pointer dereference, address: 0000000000000000 .. RIP: 0010:__xfrm_state_delete+0xc9/0x1c0 .. Call Trace: xfrm_state_delete+0x1e/0x40 xfrm_del_sa+0xb0/0x110 [xfrm_user] xfrm_user_rcv_msg+0x12d/0x270 [xfrm_user] ? remove_entity_load_avg+0x8a/0xa0 ? copy_to_user_state_extra+0x580/0x580 [xfrm_user] netlink_rcv_skb+0x51/0x100 xfrm_netlink_rcv+0x30/0x50 [xfrm_user] netlink_unicast+0x1a6/0x270 netlink_sendmsg+0x22a/0x480 __sys_sendto+0x1a6/0x1c0 ? __audit_syscall_entry+0xd8/0x130 ? __audit_syscall_exit+0x249/0x2b0 __x64_sys_sendto+0x23/0x30 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x61/0xcb Exact location of the crash in __xfrm_state_delete(): if (x->km.seq) hlist_del_rcu(&x->byseq); The hlist_node "byseq" was never populated. The bug only triggers if a new NAT traversal mapping (changed IP or port) is detected in esp_input_done2() / esp6_input_done2(), which in turn indirectly calls pfkey_send_new_mapping() *if* the kernel is compiled with CONFIG_NET_KEY and "af_key" is active. The PF_KEYv2 message SADB_X_NAT_T_NEW_MAPPING is not part of RFC 2367. Various implementations have been examined how they handle the "sadb_msg_seq" header field: - racoon (Android): does not process SADB_X_NAT_T_NEW_MAPPING - strongswan: does not care about sadb_msg_seq - openswan: does not care about sadb_msg_seq There is no standard how PF_KEYv2 sadb_msg_seq should be populated for SADB_X_NAT_T_NEW_MAPPING and it's not used in popular implementations either. Herbert Xu suggested we should just use the current km.seq value as is. This fixes the root cause of the oops since we no longer modify km.seq itself. The update of "km.seq" looks like a copy'n'paste error from pfkey_send_acquire(). SADB_ACQUIRE must indeed assign a unique km.seq number according to RFC 2367. It has been verified that code paths involving pfkey_send_acquire() don't cause the same Oops. PF_KEYv2 SADB_X_NAT_T_NEW_MAPPING support was originally added here: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git commit cbc3488685b20e7b2a98ad387a1a816aada569d8 Author: Derek Atkins AuthorDate: Wed Apr 2 13:21:02 2003 -0800 [IPSEC]: Implement UDP Encapsulation framework. In particular, implement ESPinUDP encapsulation for IPsec Nat Traversal. A note on triggering the bug: I was not able to trigger it using VMs. There is one VPN using a high latency link on our production VPN server that triggered it like once a day though. Link: https://github.com/strongswan/strongswan/issues/992 Link: https://lore.kernel.org/netdev/00959f33ee52c4b3b0084d42c430418e502db554.1652340703.git.antony.antony@secunet.com/T/ Link: https://lore.kernel.org/netdev/20221027142455.3975224-1-chenzhihao@meizu.com/T/ Fixes: fe9f1d8779cb ("xfrm: add state hashtable keyed by seq") Reported-by: Roth Mark Reported-by: Zhihao Chen Tested-by: Roth Mark Signed-off-by: Thomas Jarosch Acked-by: Antony Antony Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/key/af_key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 213287814328..95edcbedf6ef 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3394,7 +3394,7 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; - hdr->sadb_msg_seq = x->km.seq = get_acqseq(); + hdr->sadb_msg_seq = x->km.seq; hdr->sadb_msg_pid = 0; /* SA */ From 40781bfb836eda57d19c0baa37c7e72590e05fdc Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Thu, 3 Nov 2022 17:07:13 +0800 Subject: [PATCH 553/963] xfrm: Fix ignored return value in xfrm6_init() When IPv6 module initializing in xfrm6_init(), register_pernet_subsys() is possible to fail but its return value is ignored. If IPv6 initialization fails later and xfrm6_fini() is called, removing uninitialized list in xfrm6_net_ops will cause null-ptr-deref: KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 1 PID: 330 Comm: insmod RIP: 0010:unregister_pernet_operations+0xc9/0x450 Call Trace: unregister_pernet_subsys+0x31/0x3e xfrm6_fini+0x16/0x30 [ipv6] ip6_route_init+0xcd/0x128 [ipv6] inet6_init+0x29c/0x602 [ipv6] ... Fix it by catching the error return value of register_pernet_subsys(). Fixes: 8d068875caca ("xfrm: make gc_thresh configurable in all namespaces") Signed-off-by: Chen Zhongjin Reviewed-by: Leon Romanovsky Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_policy.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4a4b0e49ec92..ea435eba3053 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -287,9 +287,13 @@ int __init xfrm6_init(void) if (ret) goto out_state; - register_pernet_subsys(&xfrm6_net_ops); + ret = register_pernet_subsys(&xfrm6_net_ops); + if (ret) + goto out_protocol; out: return ret; +out_protocol: + xfrm6_protocol_fini(); out_state: xfrm6_state_fini(); out_policy: From 2d0b08c157434eebf0b6393c570089666dacf2aa Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 13 Nov 2022 19:58:38 +0100 Subject: [PATCH 554/963] MAINTAINERS: mark rsi wifi driver as orphan Neither Redpine Signals nor Silicon Labs seem to care about proper maintenance of this driver, nor is there any help, documentation or feedback on patches. The driver suffers from various problems and subtle bugs. Mark it as orphaned. Signed-off-by: Marek Vasut Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221113185838.11643-1-marex@denx.de --- MAINTAINERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index e1bc31a6624b..381f66f04e20 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17478,10 +17478,8 @@ S: Maintained F: drivers/net/wireless/realtek/rtw89/ REDPINE WIRELESS DRIVER -M: Amitkumar Karwar -M: Siva Rebbagondla L: linux-wireless@vger.kernel.org -S: Maintained +S: Orphan F: drivers/net/wireless/rsi/ REGISTER MAP ABSTRACTION From f6d910a89a2391e5ce1f275d205023880a33d3f8 Mon Sep 17 00:00:00 2001 From: Ankit Patel Date: Tue, 22 Nov 2022 15:35:20 +0800 Subject: [PATCH 555/963] HID: usbhid: Add ALWAYS_POLL quirk for some mice Some additional USB mouse devices are needing ALWAYS_POLL quirk without which they disconnect and reconnect every 60s. Add below devices to the known quirk list. CHERRY VID 0x046a, PID 0x000c MICROSOFT VID 0x045e, PID 0x0783 PRIMAX VID 0x0461, PID 0x4e2a Signed-off-by: Ankit Patel Signed-off-by: Haotien Hsu Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-quirks.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index b3dfe8d9e556..8f58c3c1bec3 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -274,6 +274,7 @@ #define USB_DEVICE_ID_CH_AXIS_295 0x001c #define USB_VENDOR_ID_CHERRY 0x046a +#define USB_DEVICE_ID_CHERRY_MOUSE_000C 0x000c #define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023 #define USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR 0x0027 @@ -917,6 +918,7 @@ #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 +#define USB_DEVICE_ID_MS_MOUSE_0783 0x0783 #define USB_VENDOR_ID_MOJO 0x8282 #define USB_DEVICE_ID_RETRO_ADAPTER 0x3201 @@ -1382,6 +1384,7 @@ #define USB_VENDOR_ID_PRIMAX 0x0461 #define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22 +#define USB_DEVICE_ID_PRIMAX_MOUSE_4E2A 0x4e2a #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 #define USB_DEVICE_ID_PRIMAX_REZEL 0x4e72 #define USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F 0x4d0f diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 50e1c717fc0a..0e9702c7f7d6 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -54,6 +54,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_MOUSE_000C), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, @@ -122,6 +123,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, @@ -146,6 +148,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL }, From e541dd7763fc34aec2f93f652a396cc2e7b92d8d Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Fri, 18 Nov 2022 14:24:47 +0800 Subject: [PATCH 556/963] net: wwan: iosm: use ACPI_FREE() but not kfree() in ipc_pcie_read_bios_cfg() acpi_evaluate_dsm() should be coupled with ACPI_FREE() to free the ACPI memory, because we need to track the allocation of acpi_object when ACPI_DBG_TRACK_ALLOCATIONS enabled, so use ACPI_FREE() instead of kfree(). Fixes: d38a648d2d6c ("net: wwan: iosm: fix memory leak in ipc_pcie_read_bios_cfg") Signed-off-by: Wang ShaoBo Link: https://lore.kernel.org/r/20221118062447.2324881-1-bobo.shaobowang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/wwan/iosm/iosm_ipc_pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index d3d34d1c4704..5bf5a93937c9 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -249,7 +249,7 @@ static enum ipc_pcie_sleep_state ipc_pcie_read_bios_cfg(struct device *dev) if (object->integer.value == 3) sleep_state = IPC_PCIE_D3L2; - kfree(object); + ACPI_FREE(object); default_ret: return sleep_state; From aad98abd5cb8133507f22654f56bcb443aaa2d89 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 17 Nov 2022 15:50:09 +0800 Subject: [PATCH 557/963] sfc: fix potential memleak in __ef100_hard_start_xmit() The __ef100_hard_start_xmit() returns NETDEV_TX_OK without freeing skb in error handling case, add dev_kfree_skb_any() to fix it. Fixes: 51b35a454efd ("sfc: skeleton EF100 PF driver") Signed-off-by: Zhang Changzhong Acked-by: Martin Habets Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/1668671409-10909-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/sfc/ef100_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 88fa29572e23..ddcc325ed570 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -218,6 +218,7 @@ netdev_tx_t __ef100_hard_start_xmit(struct sk_buff *skb, skb->len, skb->data_len, channel->channel); if (!efx->n_channels || !efx->n_tx_channels || !channel) { netif_stop_queue(net_dev); + dev_kfree_skb_any(skb); goto err; } From 4305fe232b8aa59af3761adc9fe6b6aa40913960 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 17 Nov 2022 20:59:18 +0800 Subject: [PATCH 558/963] net: sparx5: fix error handling in sparx5_port_open() If phylink_of_phy_connect() fails, the port should be disabled. If sparx5_serdes_set()/phy_power_on() fails, the port should be disabled and the phylink should be stopped and disconnected. Fixes: 946e7fd5053a ("net: sparx5: add port module support") Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Signed-off-by: Liu Jian Tested-by: Bjarni Jonasson Reviewed-by: Steen Hegelund Link: https://lore.kernel.org/r/20221117125918.203997-1-liujian56@huawei.com Signed-off-by: Paolo Abeni --- .../net/ethernet/microchip/sparx5/sparx5_netdev.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 19516ccad533..d078156581d5 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -104,7 +104,7 @@ static int sparx5_port_open(struct net_device *ndev) err = phylink_of_phy_connect(port->phylink, port->of_node, 0); if (err) { netdev_err(ndev, "Could not attach to PHY\n"); - return err; + goto err_connect; } phylink_start(port->phylink); @@ -116,10 +116,20 @@ static int sparx5_port_open(struct net_device *ndev) err = sparx5_serdes_set(port->sparx5, port, &port->conf); else err = phy_power_on(port->serdes); - if (err) + if (err) { netdev_err(ndev, "%s failed\n", __func__); + goto out_power; + } } + return 0; + +out_power: + phylink_stop(port->phylink); + phylink_disconnect_phy(port->phylink); +err_connect: + sparx5_port_enable(port, false); + return err; } From 8427fd100c7b7793650e212a81e42f1cf124613d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 18 Nov 2022 16:33:03 -0500 Subject: [PATCH 559/963] net: sched: allow act_ct to be built without NF_NAT In commit f11fe1dae1c4 ("net/sched: Make NET_ACT_CT depends on NF_NAT"), it fixed the build failure when NF_NAT is m and NET_ACT_CT is y by adding depends on NF_NAT for NET_ACT_CT. However, it would also cause NET_ACT_CT cannot be built without NF_NAT, which is not expected. This patch fixes it by changing to use "(!NF_NAT || NF_NAT)" as the depend. Fixes: f11fe1dae1c4 ("net/sched: Make NET_ACT_CT depends on NF_NAT") Signed-off-by: Xin Long Link: https://lore.kernel.org/r/b6386f28d1ba34721795fb776a91cbdabb203447.1668807183.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/sched/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1e8ab4749c6c..4662a6ce8a7e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -976,7 +976,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" - depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT && NF_FLOW_TABLE + depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE help Say Y here to allow sending the packets to conntrack module. From 4dbd6a3e90e03130973688fd79e19425f720d999 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 16 Nov 2022 10:41:24 -0800 Subject: [PATCH 560/963] x86/ioremap: Fix page aligned size calculation in __ioremap_caller() Current code re-calculates the size after aligning the starting and ending physical addresses on a page boundary. But the re-calculation also embeds the masking of high order bits that exceed the size of the physical address space (via PHYSICAL_PAGE_MASK). If the masking removes any high order bits, the size calculation results in a huge value that is likely to immediately fail. Fix this by re-calculating the page-aligned size first. Then mask any high order bits using PHYSICAL_PAGE_MASK. Fixes: ffa71f33a820 ("x86, ioremap: Fix incorrect physical address handling in PAE mode") Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Cc: Link: https://lore.kernel.org/r/1668624097-14884-2-git-send-email-mikelley@microsoft.com --- arch/x86/mm/ioremap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 78c5bc654cff..6453fbaedb08 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -217,9 +217,15 @@ __ioremap_caller(resource_size_t phys_addr, unsigned long size, * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PHYSICAL_PAGE_MASK; + phys_addr &= PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; + /* + * Mask out any bits not part of the actual physical + * address, like memory encryption bits. + */ + phys_addr &= PHYSICAL_PAGE_MASK; + retval = memtype_reserve(phys_addr, (u64)phys_addr + size, pcm, &new_pcm); if (retval) { From 53270fb0fd77fe786d8c07a0793981d797836b93 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Fri, 18 Nov 2022 16:24:19 +0800 Subject: [PATCH 561/963] NFC: nci: fix memory leak in nci_rx_data_packet() Syzbot reported a memory leak about skb: unreferenced object 0xffff88810e144e00 (size 240): comm "syz-executor284", pid 3701, jiffies 4294952403 (age 12.620s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __alloc_skb+0x1f9/0x270 net/core/skbuff.c:497 [] alloc_skb include/linux/skbuff.h:1267 [inline] [] virtual_ncidev_write+0x24/0xe0 drivers/nfc/virtual_ncidev.c:116 [] do_loop_readv_writev fs/read_write.c:759 [inline] [] do_loop_readv_writev fs/read_write.c:743 [inline] [] do_iter_write+0x253/0x300 fs/read_write.c:863 [] vfs_writev+0xdd/0x240 fs/read_write.c:934 [] do_writev+0xa6/0x1c0 fs/read_write.c:977 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd In nci_rx_data_packet(), if we don't get a valid conn_info, we will return directly but forget to release the skb. Reported-by: syzbot+cdb9a427d1bc08815104@syzkaller.appspotmail.com Fixes: 4aeee6871e8c ("NFC: nci: Add dynamic logical connections support") Signed-off-by: Liu Shixin Link: https://lore.kernel.org/r/20221118082419.239475-1-liushixin2@huawei.com Signed-off-by: Paolo Abeni --- net/nfc/nci/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index aa5e712adf07..3d36ea5701f0 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -279,8 +279,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb) nci_plen(skb->data)); conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data)); - if (!conn_info) + if (!conn_info) { + kfree_skb(skb); return; + } /* strip the nci data header */ skb_pull(skb, NCI_DATA_HDR_SIZE); From db8f91d424fe0ea6db337aca8bc05908bbce1498 Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Tue, 22 Nov 2022 12:01:13 +0530 Subject: [PATCH 562/963] ASoC: soc-pcm: Add NULL check in BE reparenting Add NULL check in dpcm_be_reparent API, to handle kernel NULL pointer dereference error. The issue occurred in fuzzing test. Signed-off-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/1669098673-29703-1-git-send-email-quic_srivasam@quicinc.com Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 8ceded22a3c4..35a16c3f9591 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1247,6 +1247,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, return; be_substream = snd_soc_dpcm_get_substream(be, stream); + if (!be_substream) + return; for_each_dpcm_fe(be, stream, dpcm) { if (dpcm->fe == fe) From 3d6c982b26db94cc21bc9f7784f63e8286b7be62 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sun, 20 Nov 2022 23:12:07 +0100 Subject: [PATCH 563/963] regulator: twl6030: re-add TWL6032_SUBCLASS In former times, info->feature was populated via the parent driver by pdata/regulator_init_data->driver_data for all regulators when USB_PRODUCT_ID_LSB indicates a TWL6032. Today, the information is not set, so re-add it at the regulator definitions. Fixes: 25d82337705e2 ("regulator: twl: make driver DT only") Signed-off-by: Andreas Kemnade Link: https://lore.kernel.org/r/20221120221208.3093727-2-andreas@kemnade.info Signed-off-by: Mark Brown --- drivers/regulator/twl6030-regulator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c index 430265c404d6..7c7e3648ea4b 100644 --- a/drivers/regulator/twl6030-regulator.c +++ b/drivers/regulator/twl6030-regulator.c @@ -530,6 +530,7 @@ static const struct twlreg_info TWL6030_INFO_##label = { \ #define TWL6032_ADJUSTABLE_LDO(label, offset) \ static const struct twlreg_info TWL6032_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ @@ -562,6 +563,7 @@ static const struct twlreg_info TWLFIXED_INFO_##label = { \ #define TWL6032_ADJUSTABLE_SMPS(label, offset) \ static const struct twlreg_info TWLSMPS_INFO_##label = { \ .base = offset, \ + .features = TWL6032_SUBCLASS, \ .desc = { \ .name = #label, \ .id = TWL6032_REG_##label, \ From 31a6297b89aabc81b274c093a308a7f5b55081a7 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Sun, 20 Nov 2022 23:12:08 +0100 Subject: [PATCH 564/963] regulator: twl6030: fix get status of twl6032 regulators Status is reported as always off in the 6032 case. Status reporting now matches the logic in the setters. Once of the differences to the 6030 is that there are no groups, therefore the state needs to be read out in the lower bits. Signed-off-by: Andreas Kemnade Link: https://lore.kernel.org/r/20221120221208.3093727-3-andreas@kemnade.info Signed-off-by: Mark Brown --- drivers/regulator/twl6030-regulator.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c index 7c7e3648ea4b..f3856750944f 100644 --- a/drivers/regulator/twl6030-regulator.c +++ b/drivers/regulator/twl6030-regulator.c @@ -67,6 +67,7 @@ struct twlreg_info { #define TWL6030_CFG_STATE_SLEEP 0x03 #define TWL6030_CFG_STATE_GRP_SHIFT 5 #define TWL6030_CFG_STATE_APP_SHIFT 2 +#define TWL6030_CFG_STATE_MASK 0x03 #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT) #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\ TWL6030_CFG_STATE_APP_SHIFT) @@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev) if (grp < 0) return grp; grp &= P1_GRP_6030; + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val = TWL6030_CFG_STATE_APP(val); } else { + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + val &= TWL6030_CFG_STATE_MASK; grp = 1; } - val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - val = TWL6030_CFG_STATE_APP(val); - return grp && (val == TWL6030_CFG_STATE_ON); } @@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev) val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); - switch (TWL6030_CFG_STATE_APP(val)) { + if (info->features & TWL6032_SUBCLASS) + val &= TWL6030_CFG_STATE_MASK; + else + val = TWL6030_CFG_STATE_APP(val); + + switch (val) { case TWL6030_CFG_STATE_ON: return REGULATOR_STATUS_NORMAL; From 3637a29ccbb6461b7268c5c5db525935d510afc6 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Sat, 19 Nov 2022 15:02:02 +0800 Subject: [PATCH 565/963] bnx2x: fix pci device refcount leak in bnx2x_vf_is_pcie_pending() As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). Call pci_dev_put() before returning from bnx2x_vf_is_pcie_pending() to avoid refcount leak. Fixes: b56e9670ffa4 ("bnx2x: Prepare device and initialize VF database") Suggested-by: Jakub Kicinski Signed-off-by: Yang Yingliang Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221119070202.1407648-1-yangyingliang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 11d15cd03600..77d4cb4ad782 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -795,16 +795,20 @@ static void bnx2x_vf_enable_traffic(struct bnx2x *bp, struct bnx2x_virtf *vf) static u8 bnx2x_vf_is_pcie_pending(struct bnx2x *bp, u8 abs_vfid) { - struct pci_dev *dev; struct bnx2x_virtf *vf = bnx2x_vf_by_abs_fid(bp, abs_vfid); + struct pci_dev *dev; + bool pending; if (!vf) return false; dev = pci_get_domain_bus_and_slot(vf->domain, vf->bus, vf->devfn); - if (dev) - return bnx2x_is_pcie_pending(dev); - return false; + if (!dev) + return false; + pending = bnx2x_is_pcie_pending(dev); + pci_dev_put(dev); + + return pending; } int bnx2x_vf_flr_clnup_epilog(struct bnx2x *bp, u8 abs_vfid) From bb3cfbaf7c6416f3109fdb14f6fc0eb1a50361ad Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Sat, 19 Nov 2022 21:36:16 +0800 Subject: [PATCH 566/963] octeontx2-pf: Remove duplicate MACSEC setting Commit 4581dd480c9e ("net: octeontx2-pf: mcs: consider MACSEC setting") has already added "depends on MACSEC || !MACSEC", so remove it. Signed-off-by: Zheng Bin Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20221119133616.3583538-1-zhengbin13@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 6b4f640163f7..993ac180a5db 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -32,7 +32,6 @@ config OCTEONTX2_PF tristate "Marvell OcteonTX2 NIC Physical Function driver" select OCTEONTX2_MBOX select NET_DEVLINK - depends on MACSEC || !MACSEC depends on (64BIT && COMPILE_TEST) || ARM64 select DIMLIB depends on PCI From 432e25902b9651622578c6248e549297d03caf66 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Sat, 5 Nov 2022 00:05:36 +0800 Subject: [PATCH 567/963] dma-buf: fix racing conflict of dma_heap_add() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Racing conflict could be: task A task B list_for_each_entry strcmp(h->name)) list_for_each_entry strcmp(h->name) kzalloc kzalloc ...... ..... device_create device_create list_add list_add The root cause is that task B has no idea about the fact someone else(A) has inserted heap with same name when it calls list_add, so a potential collision occurs. Fixes: c02a81fba74f ("dma-buf: Add dma-buf heaps framework") Signed-off-by: Dawei Li Acked-by: Andrew Davis Acked-by: Christian König Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/TYCP286MB2323873BBDF88020781FB986CA3B9@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM --- drivers/dma-buf/dma-heap.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 8f5848aa144f..59d158873f4c 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -233,18 +233,6 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) return ERR_PTR(-EINVAL); } - /* check the name is unique */ - mutex_lock(&heap_list_lock); - list_for_each_entry(h, &heap_list, list) { - if (!strcmp(h->name, exp_info->name)) { - mutex_unlock(&heap_list_lock); - pr_err("dma_heap: Already registered heap named %s\n", - exp_info->name); - return ERR_PTR(-EINVAL); - } - } - mutex_unlock(&heap_list_lock); - heap = kzalloc(sizeof(*heap), GFP_KERNEL); if (!heap) return ERR_PTR(-ENOMEM); @@ -283,13 +271,27 @@ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) err_ret = ERR_CAST(dev_ret); goto err2; } - /* Add heap to the list */ + mutex_lock(&heap_list_lock); + /* check the name is unique */ + list_for_each_entry(h, &heap_list, list) { + if (!strcmp(h->name, exp_info->name)) { + mutex_unlock(&heap_list_lock); + pr_err("dma_heap: Already registered heap named %s\n", + exp_info->name); + err_ret = ERR_PTR(-EINVAL); + goto err3; + } + } + + /* Add heap to the list */ list_add(&heap->list, &heap_list); mutex_unlock(&heap_list_lock); return heap; +err3: + device_destroy(dma_heap_class, heap->heap_devt); err2: cdev_del(&heap->heap_cdev); err1: From 534bd70374d646f17e2cebe0e6e4cdd478ce4f0c Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 15 Nov 2022 12:01:58 +0100 Subject: [PATCH 568/963] init/Kconfig: fix CC_HAS_ASM_GOTO_TIED_OUTPUT test with dash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using dash as /bin/sh, the CC_HAS_ASM_GOTO_TIED_OUTPUT test fails with a syntax error which is not the one we are looking for: : In function ‘foo’: :1:29: warning: missing terminating " character :1:29: error: missing terminating " character :2:5: error: expected ‘:’ before ‘+’ token :2:7: warning: missing terminating " character :2:7: error: missing terminating " character :2:5: error: expected declaration or statement at end of input Removing '\n' solves this. Fixes: 1aa0e8b144b6 ("Kconfig: Add option for asm goto w/ tied outputs to workaround clang-13 bug") Signed-off-by: Alexandre Belloni Reviewed-by: Sean Christopherson Signed-off-by: Masahiro Yamada --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index abf65098f1b6..94125d3b6893 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -87,7 +87,7 @@ config CC_HAS_ASM_GOTO_OUTPUT config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. - def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .\n": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) + def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null) config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) From 2dc4ac91f845b690ddf2ad39172c3698b2769fa2 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Sat, 19 Nov 2022 22:18:25 +0100 Subject: [PATCH 569/963] tsnep: Fix rotten packets If PTP synchronisation is done every second, then sporadic the interval is higher than one second: ptp4l[696.582]: master offset -17 s2 freq -1891 path delay 573 ptp4l[697.582]: master offset -22 s2 freq -1901 path delay 573 ptp4l[699.368]: master offset -1 s2 freq -1887 path delay 573 ^^^^^^^ Should be 698.582! This problem is caused by rotten packets, which are received after polling but before interrupts are enabled again. This can be fixed by checking for pending work and rescheduling if necessary after interrupts has been enabled again. Fixes: 403f69bbdbad ("tsnep: Add TSN endpoint Ethernet MAC driver") Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20221119211825.81805-1-gerhard@engleder-embedded.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/engleder/tsnep_main.c | 57 +++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index 48fb391951dd..13d5ff4e0e02 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -542,6 +542,27 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) return (budget != 0); } +static bool tsnep_tx_pending(struct tsnep_tx *tx) +{ + unsigned long flags; + struct tsnep_tx_entry *entry; + bool pending = false; + + spin_lock_irqsave(&tx->lock, flags); + + if (tx->read != tx->write) { + entry = &tx->entry[tx->read]; + if ((__le32_to_cpu(entry->desc_wb->properties) & + TSNEP_TX_DESC_OWNER_MASK) == + (entry->properties & TSNEP_TX_DESC_OWNER_MASK)) + pending = true; + } + + spin_unlock_irqrestore(&tx->lock, flags); + + return pending; +} + static int tsnep_tx_open(struct tsnep_adapter *adapter, void __iomem *addr, int queue_index, struct tsnep_tx *tx) { @@ -821,6 +842,19 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, return done; } +static bool tsnep_rx_pending(struct tsnep_rx *rx) +{ + struct tsnep_rx_entry *entry; + + entry = &rx->entry[rx->read]; + if ((__le32_to_cpu(entry->desc_wb->properties) & + TSNEP_DESC_OWNER_COUNTER_MASK) == + (entry->properties & TSNEP_DESC_OWNER_COUNTER_MASK)) + return true; + + return false; +} + static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr, int queue_index, struct tsnep_rx *rx) { @@ -866,6 +900,17 @@ static void tsnep_rx_close(struct tsnep_rx *rx) tsnep_rx_ring_cleanup(rx); } +static bool tsnep_pending(struct tsnep_queue *queue) +{ + if (queue->tx && tsnep_tx_pending(queue->tx)) + return true; + + if (queue->rx && tsnep_rx_pending(queue->rx)) + return true; + + return false; +} + static int tsnep_poll(struct napi_struct *napi, int budget) { struct tsnep_queue *queue = container_of(napi, struct tsnep_queue, @@ -886,9 +931,19 @@ static int tsnep_poll(struct napi_struct *napi, int budget) if (!complete) return budget; - if (likely(napi_complete_done(napi, done))) + if (likely(napi_complete_done(napi, done))) { tsnep_enable_irq(queue->adapter, queue->irq_mask); + /* reschedule if work is already pending, prevent rotten packets + * which are transmitted or received after polling but before + * interrupt enable + */ + if (tsnep_pending(queue)) { + tsnep_disable_irq(queue->adapter, queue->irq_mask); + napi_schedule(napi); + } + } + return min(done, budget - 1); } From a6a00d7e8ffd78d1cdb7a43f1278f081038c638f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 18 Nov 2022 00:27:58 +0900 Subject: [PATCH 570/963] fbcon: Use kzalloc() in fbcon_prepare_logo() A kernel built with syzbot's config file reported that scr_memcpyw(q, save, array3_size(logo_lines, new_cols, 2)) causes uninitialized "save" to be copied. ---------- [drm] Initialized vgem 1.0.0 20120112 for vgem on minor 0 [drm] Initialized vkms 1.0.0 20180514 for vkms on minor 1 Console: switching to colour frame buffer device 128x48 ===================================================== BUG: KMSAN: uninit-value in do_update_region+0x4b8/0xba0 do_update_region+0x4b8/0xba0 update_region+0x40d/0x840 fbcon_switch+0x3364/0x35e0 redraw_screen+0xae3/0x18a0 do_bind_con_driver+0x1cb3/0x1df0 do_take_over_console+0x11cb/0x13f0 fbcon_fb_registered+0xacc/0xfd0 register_framebuffer+0x1179/0x1320 __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 drm_fbdev_client_hotplug+0xbea/0xda0 drm_fbdev_generic_setup+0x65e/0x9d0 vkms_init+0x9f3/0xc76 (...snipped...) Uninit was stored to memory at: fbcon_prepare_logo+0x143b/0x1940 fbcon_init+0x2c1b/0x31c0 visual_init+0x3e7/0x820 do_bind_con_driver+0x14a4/0x1df0 do_take_over_console+0x11cb/0x13f0 fbcon_fb_registered+0xacc/0xfd0 register_framebuffer+0x1179/0x1320 __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 drm_fbdev_client_hotplug+0xbea/0xda0 drm_fbdev_generic_setup+0x65e/0x9d0 vkms_init+0x9f3/0xc76 (...snipped...) Uninit was created at: __kmem_cache_alloc_node+0xb69/0x1020 __kmalloc+0x379/0x680 fbcon_prepare_logo+0x704/0x1940 fbcon_init+0x2c1b/0x31c0 visual_init+0x3e7/0x820 do_bind_con_driver+0x14a4/0x1df0 do_take_over_console+0x11cb/0x13f0 fbcon_fb_registered+0xacc/0xfd0 register_framebuffer+0x1179/0x1320 __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 drm_fbdev_client_hotplug+0xbea/0xda0 drm_fbdev_generic_setup+0x65e/0x9d0 vkms_init+0x9f3/0xc76 (...snipped...) CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc4-00356-g8f2975c2bb4c #924 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 ---------- Signed-off-by: Tetsuo Handa Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/cad03d25-0ea0-32c4-8173-fd1895314bce@I-love.SAKURA.ne.jp --- drivers/video/fbdev/core/fbcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 098b62f7b701..c0143d38df83 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -577,7 +577,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, if (scr_readw(r) != vc->vc_video_erase_char) break; if (r != q && new_rows >= rows + logo_lines) { - save = kmalloc(array3_size(logo_lines, new_cols, 2), + save = kzalloc(array3_size(logo_lines, new_cols, 2), GFP_KERNEL); if (save) { int i = min(cols, new_cols); From 181babf7b4e5050b7a23cb47eb06277405cf132f Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 26 Oct 2022 20:22:40 +0200 Subject: [PATCH 571/963] usb: gadget: uvc: also use try_format in set_format Since e219a712bc06 (usb: gadget: uvc: add v4l2 try_format api call) the try_format function is available. With this function includes checks for valid configurations programmed in the configfs. We use this function to ensure to return valid values on the set_format callback. Signed-off-by: Michael Grzeschik Fixes: e219a712bc06 ("usb: gadget: uvc: add v4l2 try_format api call") Link: https://lore.kernel.org/r/20221026182240.363055-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/uvc_v4l2.c | 72 ++++++++------------------ 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index c4ed48d6b8a4..a189b08bba80 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -199,16 +199,6 @@ uvc_send_response(struct uvc_device *uvc, struct uvc_request_data *data) * V4L2 ioctls */ -struct uvc_format { - u8 bpp; - u32 fcc; -}; - -static struct uvc_format uvc_formats[] = { - { 16, V4L2_PIX_FMT_YUYV }, - { 0, V4L2_PIX_FMT_MJPEG }, -}; - static int uvc_v4l2_querycap(struct file *file, void *fh, struct v4l2_capability *cap) { @@ -242,47 +232,6 @@ uvc_v4l2_get_format(struct file *file, void *fh, struct v4l2_format *fmt) return 0; } -static int -uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt) -{ - struct video_device *vdev = video_devdata(file); - struct uvc_device *uvc = video_get_drvdata(vdev); - struct uvc_video *video = &uvc->video; - struct uvc_format *format; - unsigned int imagesize; - unsigned int bpl; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(uvc_formats); ++i) { - format = &uvc_formats[i]; - if (format->fcc == fmt->fmt.pix.pixelformat) - break; - } - - if (i == ARRAY_SIZE(uvc_formats)) { - uvcg_info(&uvc->func, "Unsupported format 0x%08x.\n", - fmt->fmt.pix.pixelformat); - return -EINVAL; - } - - bpl = format->bpp * fmt->fmt.pix.width / 8; - imagesize = bpl ? bpl * fmt->fmt.pix.height : fmt->fmt.pix.sizeimage; - - video->fcc = format->fcc; - video->bpp = format->bpp; - video->width = fmt->fmt.pix.width; - video->height = fmt->fmt.pix.height; - video->imagesize = imagesize; - - fmt->fmt.pix.field = V4L2_FIELD_NONE; - fmt->fmt.pix.bytesperline = bpl; - fmt->fmt.pix.sizeimage = imagesize; - fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; - fmt->fmt.pix.priv = 0; - - return 0; -} - static int uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) { @@ -323,6 +272,27 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) return 0; } +static int +uvc_v4l2_set_format(struct file *file, void *fh, struct v4l2_format *fmt) +{ + struct video_device *vdev = video_devdata(file); + struct uvc_device *uvc = video_get_drvdata(vdev); + struct uvc_video *video = &uvc->video; + int ret; + + ret = uvc_v4l2_try_format(file, fh, fmt); + if (ret) + return ret; + + video->fcc = fmt->fmt.pix.pixelformat; + video->bpp = fmt->fmt.pix.bytesperline * 8 / video->width; + video->width = fmt->fmt.pix.width; + video->height = fmt->fmt.pix.height; + video->imagesize = fmt->fmt.pix.sizeimage; + + return ret; +} + static int uvc_v4l2_enum_frameintervals(struct file *file, void *fh, struct v4l2_frmivalenum *fival) From 3aa07f72894d209fcf922ad686cbb28cf005aaad Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Wed, 9 Nov 2022 17:58:50 -0800 Subject: [PATCH 572/963] usb: dwc3: gadget: Disable GUSB2PHYCFG.SUSPHY for End Transfer If there's a disconnection while operating in eSS, there may be a delay in VBUS drop response from the connector. In that case, the internal link state may drop to operate in usb2 speed while the controller thinks the VBUS is still high. The driver must make sure to disable GUSB2PHYCFG.SUSPHY when sending endpoint command while in usb2 speed. The End Transfer command may be called, and only that command needs to go through at this point. Let's keep it simple and unconditionally disable GUSB2PHYCFG.SUSPHY whenever we issue the command. This scenario is not seen in real hardware. In a rare case, our prototype type-c controller/interface may have a slow response triggerring this issue. Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/5651117207803c26e2f22ddf4e5ce9e865dcf7c7.1668045468.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 026d4029bda6..8b4cfa07539d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, * * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 */ - if (dwc->gadget->speed <= USB_SPEED_HIGH) { + if (dwc->gadget->speed <= USB_SPEED_HIGH || + DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; From b25264f22b498dff3fa5c70c9bea840e83fff0d1 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Thu, 10 Nov 2022 01:30:05 -0500 Subject: [PATCH 573/963] usb: cdnsp: Fix issue with Clear Feature Halt Endpoint During handling Clear Halt Endpoint Feature request, driver invokes Reset Endpoint command. Because this command has some issue with transition endpoint from Running to Idle state the driver must stop the endpoint by using Stop Endpoint command. cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Reviewed-by: Peter Chen Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20221110063005.370656-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-gadget.c | 12 ++++-------- drivers/usb/cdns3/cdnsp-ring.c | 3 ++- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index c67715f6f756..f9aa50ff14d4 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -600,11 +600,11 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, trace_cdnsp_ep_halt(value ? "Set" : "Clear"); - if (value) { - ret = cdnsp_cmd_stop_ep(pdev, pep); - if (ret) - return ret; + ret = cdnsp_cmd_stop_ep(pdev, pep); + if (ret) + return ret; + if (value) { if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_STOPPED) { cdnsp_queue_halt_endpoint(pdev, pep->idx); cdnsp_ring_cmd_db(pdev); @@ -613,10 +613,6 @@ int cdnsp_halt_endpoint(struct cdnsp_device *pdev, pep->ep_state |= EP_HALTED; } else { - /* - * In device mode driver can call reset endpoint command - * from any endpoint state. - */ cdnsp_queue_reset_ep(pdev, pep->idx); cdnsp_ring_cmd_db(pdev); ret = cdnsp_wait_for_cmd_compl(pdev); diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 794e413800ae..04dfcaa08dc4 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -2076,7 +2076,8 @@ int cdnsp_cmd_stop_ep(struct cdnsp_device *pdev, struct cdnsp_ep *pep) u32 ep_state = GET_EP_CTX_STATE(pep->out_ctx); int ret = 0; - if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED) { + if (ep_state == EP_STATE_STOPPED || ep_state == EP_STATE_DISABLED || + ep_state == EP_STATE_HALTED) { trace_cdnsp_ep_stopped_or_disabled(pep->out_ctx); goto ep_stopped; } From e0481e5b3cc12ea7ccf4552d41518c89d3509004 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 10 Nov 2022 16:41:31 +0100 Subject: [PATCH 574/963] usb: dwc3: exynos: Fix remove() function The core DWC3 device node was not properly removed by the custom dwc3_exynos_remove_child() function. Replace it with generic of_platform_depopulate() which does that job right. Fixes: adcf20dcd262 ("usb: dwc3: exynos: Use of_platform API to create dwc3 core pdev") Signed-off-by: Marek Szyprowski Acked-by: Thinh Nguyen Cc: stable@vger.kernel.org Reviewed-by: Sam Protsenko Link: https://lore.kernel.org/r/20221110154131.2577-1-m.szyprowski@samsung.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-exynos.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c index 0ecf20eeceee..4be6a873bd07 100644 --- a/drivers/usb/dwc3/dwc3-exynos.c +++ b/drivers/usb/dwc3/dwc3-exynos.c @@ -37,15 +37,6 @@ struct dwc3_exynos { struct regulator *vdd10; }; -static int dwc3_exynos_remove_child(struct device *dev, void *unused) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - - return 0; -} - static int dwc3_exynos_probe(struct platform_device *pdev) { struct dwc3_exynos *exynos; @@ -142,7 +133,7 @@ static int dwc3_exynos_remove(struct platform_device *pdev) struct dwc3_exynos *exynos = platform_get_drvdata(pdev); int i; - device_for_each_child(&pdev->dev, NULL, dwc3_exynos_remove_child); + of_platform_depopulate(&pdev->dev); for (i = exynos->num_clks - 1; i >= 0; i--) clk_disable_unprepare(exynos->clks[i]); From f90f5afd5083a7cb4aee13bd4cc0ae600bd381ca Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Tue, 15 Nov 2022 17:19:43 -0800 Subject: [PATCH 575/963] usb: dwc3: gadget: Clear ep descriptor last Until the endpoint is disabled, its descriptors should remain valid. When its requests are removed from ep disable, the request completion routine may attempt to access the endpoint's descriptor. Don't clear the descriptors before that. Fixes: f09ddcfcb8c5 ("usb: dwc3: gadget: Prevent EP queuing while stopping transfers") Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/45db7c83b209259115bf652af210f8b2b3b1a383.1668561364.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8b4cfa07539d..6d524fa76443 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1024,12 +1024,6 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) reg &= ~DWC3_DALEPENA_EP(dep->number); dwc3_writel(dwc->regs, DWC3_DALEPENA, reg); - /* Clear out the ep descriptors for non-ep0 */ - if (dep->number > 1) { - dep->endpoint.comp_desc = NULL; - dep->endpoint.desc = NULL; - } - dwc3_remove_requests(dwc, dep, -ESHUTDOWN); dep->stream_capable = false; @@ -1044,6 +1038,12 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) mask |= (DWC3_EP_DELAY_STOP | DWC3_EP_TRANSFER_STARTED); dep->flags &= mask; + /* Clear out the ep descriptors for non-ep0 */ + if (dep->number > 1) { + dep->endpoint.comp_desc = NULL; + dep->endpoint.desc = NULL; + } + return 0; } From 7a21b27aafa3edead79ed97e6f22236be6b9f447 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 15 Nov 2022 04:22:18 -0500 Subject: [PATCH 576/963] usb: cdnsp: fix issue with ZLP - added TD_SIZE = 1 Patch modifies the TD_SIZE in TRB before ZLP TRB. The TD_SIZE in TRB before ZLP TRB must be set to 1 to force processing ZLP TRB by controller. cc: Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20221115092218.421267-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdnsp-ring.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c index 04dfcaa08dc4..2f29431f612e 100644 --- a/drivers/usb/cdns3/cdnsp-ring.c +++ b/drivers/usb/cdns3/cdnsp-ring.c @@ -1763,10 +1763,15 @@ static u32 cdnsp_td_remainder(struct cdnsp_device *pdev, int trb_buff_len, unsigned int td_total_len, struct cdnsp_request *preq, - bool more_trbs_coming) + bool more_trbs_coming, + bool zlp) { u32 maxp, total_packet_count; + /* Before ZLP driver needs set TD_SIZE = 1. */ + if (zlp) + return 1; + /* One TRB with a zero-length data packet. */ if (!more_trbs_coming || (transferred == 0 && trb_buff_len == 0) || trb_buff_len == td_total_len) @@ -1960,7 +1965,8 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) /* Set the TRB length, TD size, and interrupter fields. */ remainder = cdnsp_td_remainder(pdev, enqd_len, trb_buff_len, full_len, preq, - more_trbs_coming); + more_trbs_coming, + zero_len_trb); length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -2025,7 +2031,7 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq) if (preq->request.length > 0) { remainder = cdnsp_td_remainder(pdev, 0, preq->request.length, - preq->request.length, preq, 1); + preq->request.length, preq, 1, 0); length_field = TRB_LEN(preq->request.length) | TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0); @@ -2226,7 +2232,7 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev, /* Set the TRB length, TD size, & interrupter fields. */ remainder = cdnsp_td_remainder(pdev, running_total, trb_buff_len, td_len, preq, - more_trbs_coming); + more_trbs_coming, 0); length_field = TRB_LEN(trb_buff_len) | TRB_INTR_TARGET(0); From 58e92c4a496b27156020a59a98c7f4a92c2b1533 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 18 Nov 2022 06:38:38 +0000 Subject: [PATCH 577/963] nvmem: rmem: Fix return value check in rmem_read() In case of error, the function memremap() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Fixes: 5a3fa75a4d9c ("nvmem: Add driver to expose reserved memory as nvmem") Cc: Srinivas Kandagatla Cc: Nicolas Saenz Julienne Signed-off-by: Wei Yongjun Acked-by: Nicolas Saenz Julienne Signed-off-by: Yang Yingliang Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063840.6357-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/rmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c index b11c3c974b3d..80cb187f1481 100644 --- a/drivers/nvmem/rmem.c +++ b/drivers/nvmem/rmem.c @@ -37,9 +37,9 @@ static int rmem_read(void *context, unsigned int offset, * but as of Dec 2020 this isn't possible on arm64. */ addr = memremap(priv->mem->base, available, MEMREMAP_WB); - if (IS_ERR(addr)) { + if (!addr) { dev_err(priv->dev, "Failed to remap memory region\n"); - return PTR_ERR(addr); + return -ENOMEM; } count = memory_read_from_buffer(val, bytes, &off, addr, available); From 022b68f271de0e53024e6d5e96fee8e76d25eb95 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 18 Nov 2022 06:38:40 +0000 Subject: [PATCH 578/963] nvmem: lan9662-otp: Change return type of lan9662_otp_wait_flag_clear() The blamed commit introduced the following smatch warning in the function lan9662_otp_wait_flag_clear: drivers/nvmem/lan9662-otpc.c:43 lan9662_otp_wait_flag_clear() warn: signedness bug returning '(-110)' Fix this by changing the return type of the function lan9662_otp_wait_flag_clear() to be int instead of bool. Fixes: 9e8f208ad5229d ("nvmem: lan9662-otp: add support") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Horatiu Vultur Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221118063840.6357-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/lan9662-otpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvmem/lan9662-otpc.c b/drivers/nvmem/lan9662-otpc.c index 377bf34c2946..56fc19f092a7 100644 --- a/drivers/nvmem/lan9662-otpc.c +++ b/drivers/nvmem/lan9662-otpc.c @@ -36,7 +36,7 @@ struct lan9662_otp { void __iomem *base; }; -static bool lan9662_otp_wait_flag_clear(void __iomem *reg, u32 flag) +static int lan9662_otp_wait_flag_clear(void __iomem *reg, u32 flag) { u32 val; From 96f7baefd92add1868c76b72cc09edf35f666475 Mon Sep 17 00:00:00 2001 From: Dan Vacura Date: Fri, 1 Oct 2021 12:00:19 -0500 Subject: [PATCH 579/963] ANDROID: gki_defconfig: enable CONFIG_USB_CONFIGFS_F_UVC Enable the UVC function driver to allow USB gadgets to connect as a standard video device to a host. Bug: 200712777 Bug: 242344221 Signed-off-by: Dan Vacura Change-Id: Ia037f8560664f9e98f28f3fede609764d5d5699d (cherry picked from commit 8d5dd0a5a458f951f0fdc25aba0cb8329b121d51) (cherry picked from commit 885f16fab68e456b9dc9856641b706ce17551456) --- arch/arm64/configs/gki_defconfig | 1 + arch/x86/configs/gki_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index ffe9362f1cf6..b20f36830abd 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -491,6 +491,7 @@ CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y CONFIG_USB_CONFIGFS_F_UAC2=y CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_USB_CONFIGFS_F_HID=y +CONFIG_USB_CONFIGFS_F_UVC=y CONFIG_TYPEC=y CONFIG_TYPEC_TCPM=y CONFIG_TYPEC_TCPCI=y diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index f3d4d8a58d45..7104cb48606c 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -449,6 +449,7 @@ CONFIG_USB_CONFIGFS_F_ACC=y CONFIG_USB_CONFIGFS_F_AUDIO_SRC=y CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_USB_CONFIGFS_F_HID=y +CONFIG_USB_CONFIGFS_F_UVC=y CONFIG_TYPEC=y CONFIG_TYPEC_TCPM=y CONFIG_TYPEC_TCPCI=y From cdcc5ef26b39c3d02d4e69c0352b007ebe438a22 Mon Sep 17 00:00:00 2001 From: Sam Wu Date: Thu, 10 Nov 2022 19:57:32 +0000 Subject: [PATCH 580/963] Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy" This reverts commit 6d5afdc97ea71958287364a1f1d07e59ef151b11. On a Pixel 6 device, it is observed that this commit increases latency by approximately 50ms, or 20%, in migrating a task that requires full CPU utilization from a LITTLE CPU to Fmax on a big CPU. Reverting this change restores the latency back to its original baseline value. Fixes: 6d5afdc97ea7 ("cpufreq: schedutil: Move max CPU capacity to sugov_policy") Signed-off-by: Sam Wu Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9161d1136d01..1207c78f85c1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -25,9 +25,6 @@ struct sugov_policy { unsigned int next_freq; unsigned int cached_raw_freq; - /* max CPU capacity, which is equal for all CPUs in freq. domain */ - unsigned long max; - /* The next fields are only needed if fast switch cannot be used: */ struct irq_work irq_work; struct kthread_work work; @@ -51,6 +48,7 @@ struct sugov_cpu { unsigned long util; unsigned long bw_dl; + unsigned long max; /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON @@ -160,6 +158,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); + sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), FREQUENCY_UTIL, NULL); @@ -254,7 +253,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, */ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long boost; /* No boost currently required */ @@ -282,8 +280,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare. */ - boost = sg_cpu->iowait_boost * sg_policy->max; - boost >>= SCHED_CAPACITY_SHIFT; + boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; @@ -340,7 +337,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, if (!sugov_update_single_common(sg_cpu, time, flags)) return; - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -376,7 +373,6 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); - struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long prev_util = sg_cpu->util; /* @@ -403,8 +399,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), - map_util_perf(sg_cpu->util), - sg_policy->max); + map_util_perf(sg_cpu->util), sg_cpu->max); sg_cpu->sg_policy->last_freq_update_time = time; } @@ -413,19 +408,25 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util = 0; + unsigned long util = 0, max = 1; unsigned int j; for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); + unsigned long j_util, j_max; sugov_get_util(j_sg_cpu); sugov_iowait_apply(j_sg_cpu, time); + j_util = j_sg_cpu->util; + j_max = j_sg_cpu->max; - util = max(j_sg_cpu->util, util); + if (j_util * max > j_max * util) { + util = j_util; + max = j_max; + } } - return get_next_freq(sg_policy, util, sg_policy->max); + return get_next_freq(sg_policy, util, max); } static void @@ -751,7 +752,7 @@ static int sugov_start(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); - unsigned int cpu = cpumask_first(policy->cpus); + unsigned int cpu; sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->last_freq_update_time = 0; @@ -759,7 +760,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->work_in_progress = false; sg_policy->limits_changed = false; sg_policy->cached_raw_freq = 0; - sg_policy->max = arch_scale_cpu_capacity(cpu); sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); From 919f4557696939625085435ebde09a539de2349c Mon Sep 17 00:00:00 2001 From: Wyes Karny Date: Thu, 17 Nov 2022 15:35:37 +0800 Subject: [PATCH 581/963] cpufreq: amd-pstate: cpufreq: amd-pstate: reset MSR_AMD_PERF_CTL register at init MSR_AMD_PERF_CTL is guaranteed to be 0 on a cold boot. However, on a kexec boot, for instance, it may have a non-zero value (if the cpu was in a non-P0 Pstate). In such cases, the cores with non-P0 Pstates at boot will never be pushed to P0, let alone boost frequencies. Kexec is a common workflow for reboot on Linux and this creates a regression in performance. Fix it by explicitly setting the MSR_AMD_PERF_CTL to 0 during amd_pstate driver init. Cc: All applicable Acked-by: Huang Rui Reviewed-by: Gautham R. Shenoy Tested-by: Wyes Karny Signed-off-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index ace7d50cf2ac..d844c6f97caf 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -424,12 +424,22 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) amd_pstate_driver.boost_enabled = true; } +static void amd_perf_ctl_reset(unsigned int cpu) +{ + wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; struct device *dev; struct amd_cpudata *cpudata; + /* + * Resetting PERF_CTL_MSR will put the CPU in P0 frequency, + * which is ideal for initialization process. + */ + amd_perf_ctl_reset(policy->cpu); dev = get_cpu_device(policy->cpu); if (!dev) return -ENODEV; From 456ca88d8a5258fc66edc42a10053ac8473de2b1 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Thu, 17 Nov 2022 15:35:38 +0800 Subject: [PATCH 582/963] cpufreq: amd-pstate: change amd-pstate driver to be built-in type Currently when the amd-pstate and acpi_cpufreq are both built into kernel as module driver, amd-pstate will not be loaded by default in this case. Change amd-pstate driver as built-in type, it will resolve the loading sequence problem to allow user to make amd-pstate driver as the default cpufreq scaling driver. Acked-by: Huang Rui Reviewed-by: Gautham R. Shenoy Tested-by: Wyes Karny Signed-off-by: Perry Yuan Fixes: ec437d71db77 ("cpufreq: amd-pstate: Introduce a new AMD P-State driver to support future processors") Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.x86 | 2 +- drivers/cpufreq/amd-pstate.c | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index 310779b07daf..00476e94db90 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -35,7 +35,7 @@ config X86_PCC_CPUFREQ If in doubt, say N. config X86_AMD_PSTATE - tristate "AMD Processor P-State driver" + bool "AMD Processor P-State driver" depends on X86 && ACPI select ACPI_PROCESSOR select ACPI_CPPC_LIB if X86_64 diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index d844c6f97caf..701f49d6d240 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -663,16 +663,7 @@ static int __init amd_pstate_init(void) return ret; } - -static void __exit amd_pstate_exit(void) -{ - cpufreq_unregister_driver(&amd_pstate_driver); - - amd_pstate_enable(false); -} - -module_init(amd_pstate_init); -module_exit(amd_pstate_exit); +device_initcall(amd_pstate_init); MODULE_AUTHOR("Huang Rui "); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); From 202e683df37cdf4c38e06e56ac91cc170ef49058 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Thu, 17 Nov 2022 15:35:39 +0800 Subject: [PATCH 583/963] cpufreq: amd-pstate: add amd-pstate driver parameter for mode selection When the amd_pstate driver is built-in users still need a method to be able enable or disable it depending upon their circumstance. Add support for an early parameter to do this. There is some performance degradation on a number of ASICs in the passive mode. This performance issue was originally discovered in shared memory systems but it has been proven that certain workloads on MSR systems also suffer performance issues. Set the amd-pstate driver as disabled by default to temporarily mitigate the performance problem. 1) with `amd_pstate=disable`, pstate driver will be disabled to load at kernel booting. 2) with `amd_pstate=passive`, pstate driver will be enabled and loaded as non-autonomous working mode supported in the low-level power management firmware. 3) If neither parameter is specified, the driver will be disabled by default to avoid triggering performance regressions in certain ASICs Acked-by: Huang Rui Reviewed-by: Gautham R. Shenoy Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/amd-pstate.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 701f49d6d240..204e39006dda 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -59,12 +59,8 @@ * we disable it by default to go acpi-cpufreq on these processors and add a * module parameter to be able to enable it manually for debugging. */ -static bool shared_mem = false; -module_param(shared_mem, bool, 0444); -MODULE_PARM_DESC(shared_mem, - "enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)"); - static struct cpufreq_driver amd_pstate_driver; +static int cppc_load __initdata; static inline int pstate_enable(bool enable) { @@ -626,6 +622,15 @@ static int __init amd_pstate_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return -ENODEV; + /* + * by default the pstate driver is disabled to load + * enable the amd_pstate passive mode driver explicitly + * with amd_pstate=passive in kernel command line + */ + if (!cppc_load) { + pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n"); + return -ENODEV; + } if (!acpi_cpc_valid()) { pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); @@ -640,13 +645,11 @@ static int __init amd_pstate_init(void) if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; - } else if (shared_mem) { + } else { + pr_debug("AMD CPPC shared memory based functionality is supported\n"); static_call_update(amd_pstate_enable, cppc_enable); static_call_update(amd_pstate_init_perf, cppc_init_perf); static_call_update(amd_pstate_update_perf, cppc_update_perf); - } else { - pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n"); - return -ENODEV; } /* enable amd pstate feature */ @@ -665,6 +668,21 @@ static int __init amd_pstate_init(void) } device_initcall(amd_pstate_init); +static int __init amd_pstate_param(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "disable")) { + cppc_load = 0; + pr_info("driver is explicitly disabled\n"); + } else if (!strcmp(str, "passive")) + cppc_load = 1; + + return 0; +} +early_param("amd_pstate", amd_pstate_param); + MODULE_AUTHOR("Huang Rui "); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); MODULE_LICENSE("GPL"); From 8a2cbf72a43ade1ed00760f65914e322599fe662 Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Thu, 17 Nov 2022 15:35:40 +0800 Subject: [PATCH 584/963] Documentation: amd-pstate: add driver working mode introduction Introduce the `amd_pstate` driver new working mode with `amd_pstate=passive` added to kernel command line. If there is no passive mode enabled by user, amd_pstate driver will be disabled by default for now. Acked-by: Huang Rui Reviewed-by: Gautham R. Shenoy Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/amd-pstate.rst | 26 +++++++++------------ 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst index 8f3d30c5a0d8..06e23538f79c 100644 --- a/Documentation/admin-guide/pm/amd-pstate.rst +++ b/Documentation/admin-guide/pm/amd-pstate.rst @@ -283,23 +283,19 @@ efficiency frequency management method on AMD processors. Kernel Module Options for ``amd-pstate`` ========================================= -.. _shared_mem: +Passive Mode +------------ -``shared_mem`` -Use a module param (shared_mem) to enable related processors manually with -**amd_pstate.shared_mem=1**. -Due to the performance issue on the processors with `Shared Memory Support -`_, we disable it presently and will re-enable this by default -once we address performance issue with this solution. +``amd_pstate=passive`` -To check whether the current processor is using `Full MSR Support `_ -or `Shared Memory Support `_ : :: - - ray@hr-test1:~$ lscpu | grep cppc - Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm - -If the CPU flags have ``cppc``, then this processor supports `Full MSR Support -`_. Otherwise, it supports `Shared Memory Support `_. +It will be enabled if the ``amd_pstate=passive`` is passed to the kernel in the command line. +In this mode, ``amd_pstate`` driver software specifies a desired QoS target in the CPPC +performance scale as a relative number. This can be expressed as percentage of nominal +performance (infrastructure max). Below the nominal sustained performance level, +desired performance expresses the average performance level of the processor subject +to the Performance Reduction Tolerance register. Above the nominal performance level, +processor must provide at least nominal performance requested and go higher if current +operating conditions allow. ``cpupower`` tool support for ``amd-pstate`` From 1056d314709d0607a22e589c54b1e47e0da57b9d Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Thu, 17 Nov 2022 15:35:41 +0800 Subject: [PATCH 585/963] Documentation: add amd-pstate kernel command line options Add a new amd pstate driver command line option to enable driver passive working mode via MSR and shared memory interface to request desired performance within abstract scale and the power management firmware (SMU) convert the perf requests into actual hardware pstates. Also the `disable` parameter can disable the pstate driver loading by adding `amd_pstate=disable` to kernel command line. Acked-by: Huang Rui Reviewed-by: Gautham R. Shenoy Tested-by: Wyes Karny Signed-off-by: Perry Yuan Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a465d5242774..42af9ca0127e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6959,3 +6959,14 @@ memory, and other data can't be written using xmon commands. off xmon is disabled. + + amd_pstate= [X86] + disable + Do not enable amd_pstate as the default + scaling driver for the supported processors + passive + Use amd_pstate as a scaling driver, driver requests a + desired performance on this abstract scale and the power + management firmware translates the requests into actual + hardware states (core frequency, data fabric and memory + clocks etc.) From 11780e37565db4dd064d3243ca68f755c13f65b4 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 22 Nov 2022 00:38:55 +0100 Subject: [PATCH 586/963] pinctrl: meditatek: Startup with the IRQs disabled If the system is restarted via kexec(), the peripherals do not start with a known state. If the previous system had enabled an IRQs we will receive unexected IRQs that can lock the system. [ 28.109251] watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [swapper/0:0] [ 28.109263] Modules linked in: [ 28.109273] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.15.79-14458-g4b9edf7b1ac6 #1 9f2e76613148af94acccd64c609a552fb4b4354b [ 28.109284] Hardware name: Google Elm (DT) [ 28.109290] pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 28.109298] pc : __do_softirq+0xa0/0x388 [ 28.109309] lr : __do_softirq+0x70/0x388 [ 28.109316] sp : ffffffc008003ee0 [ 28.109321] x29: ffffffc008003f00 x28: 000000000000000a x27: 0000000000000080 [ 28.109334] x26: 0000000000000001 x25: ffffffefa7b350c0 x24: ffffffefa7b47480 [ 28.109346] x23: ffffffefa7b3d000 x22: 0000000000000000 x21: ffffffefa7b0fa40 [ 28.109358] x20: ffffffefa7b005b0 x19: ffffffefa7b47480 x18: 0000000000065b6b [ 28.109370] x17: ffffffefa749c8b0 x16: 000000000000018c x15: 00000000000001b8 [ 28.109382] x14: 00000000000d3b6b x13: 0000000000000006 x12: 0000000000057e91 [ 28.109394] x11: 0000000000000000 x10: 0000000000000000 x9 : ffffffefa7b47480 [ 28.109406] x8 : 00000000000000e0 x7 : 000000000f424000 x6 : 0000000000000000 [ 28.109418] x5 : ffffffefa7dfaca0 x4 : ffffffefa7dfadf0 x3 : 000000000000000f [ 28.109429] x2 : 0000000000000000 x1 : 0000000000000100 x0 : 0000000001ac65c5 [ 28.109441] Call trace: [ 28.109447] __do_softirq+0xa0/0x388 [ 28.109454] irq_exit+0xc0/0xe0 [ 28.109464] handle_domain_irq+0x68/0x90 [ 28.109473] gic_handle_irq+0xac/0xf0 [ 28.109480] call_on_irq_stack+0x28/0x50 [ 28.109488] do_interrupt_handler+0x44/0x58 [ 28.109496] el1_interrupt+0x30/0x58 [ 28.109506] el1h_64_irq_handler+0x18/0x24 [ 28.109512] el1h_64_irq+0x7c/0x80 [ 28.109519] arch_local_irq_enable+0xc/0x18 [ 28.109529] default_idle_call+0x40/0x140 [ 28.109539] do_idle+0x108/0x290 [ 28.109547] cpu_startup_entry+0x2c/0x30 [ 28.109554] rest_init+0xe8/0xf8 [ 28.109562] arch_call_rest_init+0x18/0x24 [ 28.109571] start_kernel+0x338/0x42c [ 28.109578] __primary_switched+0xbc/0xc4 [ 28.109588] Kernel panic - not syncing: softlockup: hung tasks Signed-off-by: Ricardo Ribalda Link: https://lore.kernel.org/r/20221122-mtk-pinctrl-v1-1-bedf5655a3d2@chromium.org Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/mtk-eint.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index 65d312967619..27f0a54e12bf 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -303,12 +303,15 @@ static struct irq_chip mtk_eint_irq_chip = { static unsigned int mtk_eint_hw_init(struct mtk_eint *eint) { - void __iomem *reg = eint->base + eint->regs->dom_en; + void __iomem *dom_en = eint->base + eint->regs->dom_en; + void __iomem *mask_set = eint->base + eint->regs->mask_set; unsigned int i; for (i = 0; i < eint->hw->ap_num; i += 32) { - writel(0xffffffff, reg); - reg += 4; + writel(0xffffffff, dom_en); + writel(0xffffffff, mask_set); + dom_en += 4; + mask_set += 4; } return 0; From 6a66ce44a51bdfc47721f0c591137df2d4b21247 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 22 Nov 2022 20:18:58 +0100 Subject: [PATCH 587/963] netfilter: ipset: restore allowing 64 clashing elements in hash:net,iface The commit 510841da1fcc ("netfilter: ipset: enforce documented limit to prevent allocating huge memory") was too strict and prevented to add up to 64 clashing elements to a hash:net,iface type of set. This patch fixes the issue and now the type behaves as documented. Fixes: 510841da1fcc ("netfilter: ipset: enforce documented limit to prevent allocating huge memory") Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 3adc291d9ce1..7499192af586 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -916,7 +916,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, #ifdef IP_SET_HASH_WITH_MULTI if (h->bucketsize >= AHASH_MAX_TUNED) goto set_full; - else if (h->bucketsize < multi) + else if (h->bucketsize <= multi) h->bucketsize += AHASH_INIT_SIZE; #endif if (n->size >= AHASH_MAX(h)) { From bcd9e3c1656d0f7dd9743598c65c3ae24efb38d0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 21 Nov 2022 19:26:15 +0100 Subject: [PATCH 588/963] netfilter: flowtable_offload: add missing locking nf_flow_table_block_setup and the driver TC_SETUP_FT call can modify the flow block cb list while they are being traversed elsewhere, causing a crash. Add a write lock around the calls to protect readers Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Reported-by: Chad Monroe Signed-off-by: Felix Fietkau Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index b04645ced89b..00b522890d77 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1098,6 +1098,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, struct flow_block_cb *block_cb, *next; int err = 0; + down_write(&flowtable->flow_block_lock); switch (cmd) { case FLOW_BLOCK_BIND: list_splice(&bo->cb_list, &flowtable->flow_block.cb_list); @@ -1112,6 +1113,7 @@ static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, WARN_ON_ONCE(1); err = -EOPNOTSUPP; } + up_write(&flowtable->flow_block_lock); return err; } @@ -1168,7 +1170,9 @@ static int nf_flow_table_offload_cmd(struct flow_block_offload *bo, nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable, extack); + down_write(&flowtable->flow_block_lock); err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo); + up_write(&flowtable->flow_block_lock); if (err < 0) return err; From 0a068f4a717f2a68b34452de682accbb1a40bed0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 20 Oct 2022 14:30:19 +0100 Subject: [PATCH 589/963] tracing/hist: add in missing * in comment blocks There are a couple of missing * in comment blocks. Fix these. Cleans up two clang warnings: kernel/trace/trace_events_hist.c:986: warning: bad line: kernel/trace/trace_events_hist.c:3229: warning: bad line: Link: https://lkml.kernel.org/r/20221020133019.1547587-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 48465f7e97b4..087c19548049 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -983,7 +983,7 @@ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, * A trigger can define one or more variables. If any one of them is * currently referenced by any other trigger, this function will * determine that. - + * * Typically used to determine whether or not a trigger can be removed * - if there are any references to a trigger's variables, it cannot. * @@ -3226,7 +3226,7 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, * events. However, for convenience, users are allowed to directly * specify an event field in an action, which will be automatically * converted into a variable on their behalf. - + * * This function creates a field variable with the name var_name on * the hist trigger currently being defined on the target event. If * subsys_name and event_name are specified, this function simply From 4e3c51f4e805291b057d12f5dda5aeb50a538dc4 Mon Sep 17 00:00:00 2001 From: Svyatoslav Feldsherov Date: Tue, 15 Nov 2022 20:20:01 +0000 Subject: [PATCH 590/963] fs: do not update freeing inode i_io_list After commit cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") writeback_single_inode can push inode with I_DIRTY_TIME set to b_dirty_time list. In case of freeing inode with I_DIRTY_TIME set this can happen after deletion of inode from i_io_list at evict. Stack trace is following. evict fat_evict_inode fat_truncate_blocks fat_flush_inodes writeback_inode sync_inode_metadata(inode, sync=0) writeback_single_inode(inode, wbc) <- wbc->sync_mode == WB_SYNC_NONE This will lead to use after free in flusher thread. Similar issue can be triggered if writeback_single_inode in the stack trace update inode->i_io_list. Add explicit check to avoid it. Fixes: cbfecb927f42 ("fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE") Reported-by: syzbot+6ba92bd00d5093f7e371@syzkaller.appspotmail.com Reviewed-by: Jan Kara Signed-off-by: Svyatoslav Feldsherov Link: https://lore.kernel.org/r/20221115202001.324188-1-feldsherov@google.com Signed-off-by: Theodore Ts'o --- fs/fs-writeback.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 443f83382b9b..9958d4020771 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1712,18 +1712,26 @@ static int writeback_single_inode(struct inode *inode, wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* - * If the inode is now fully clean, then it can be safely removed from - * its writeback list (if any). Otherwise the flusher threads are - * responsible for the writeback lists. + * If the inode is freeing, its i_io_list shoudn't be updated + * as it can be finally deleted at this moment. */ - if (!(inode->i_state & I_DIRTY_ALL)) - inode_cgwb_move_to_attached(inode, wb); - else if (!(inode->i_state & I_SYNC_QUEUED)) { - if ((inode->i_state & I_DIRTY)) - redirty_tail_locked(inode, wb); - else if (inode->i_state & I_DIRTY_TIME) { - inode->dirtied_when = jiffies; - inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); + if (!(inode->i_state & I_FREEING)) { + /* + * If the inode is now fully clean, then it can be safely + * removed from its writeback list (if any). Otherwise the + * flusher threads are responsible for the writeback lists. + */ + if (!(inode->i_state & I_DIRTY_ALL)) + inode_cgwb_move_to_attached(inode, wb); + else if (!(inode->i_state & I_SYNC_QUEUED)) { + if ((inode->i_state & I_DIRTY)) + redirty_tail_locked(inode, wb); + else if (inode->i_state & I_DIRTY_TIME) { + inode->dirtied_when = jiffies; + inode_io_list_move_locked(inode, + wb, + &wb->b_dirty_time); + } } } From ff5a19909b49fe5c0b01ae197f84b741e0f698dc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 22 Nov 2022 14:44:11 +0100 Subject: [PATCH 591/963] bus: ixp4xx: Don't touch bit 7 on IXP42x We face some regressions on a few IXP42x systems when accessing flash, the following unrelated error prints appear from the PCI driver: ixp4xx-pci c0000000.pci: PCI: abort_handler addr = 0xff9ffb5f, isr = 0x0, status = 0x22a0 ixp4xx-pci c0000000.pci: imprecise abort (...) It turns out that while bit 7 is masked "reserved" it is not unused, so masking it off as zero is dangerous, and breaks flash access on some systems such as the NSLU2. Be more careful and avoid masking off any of the reserved bits 7, 8, 9 or 30. Only keep masking EXP_WORD (bit 2) on IXP43x which is necessary in some setups. Fixes: 1c953bda90ca ("bus: ixp4xx: Add a driver for IXP4xx expansion bus") Signed-off-by: Linus Walleij Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221122134411.2030372-1-linus.walleij@linaro.org Signed-off-by: Arnd Bergmann --- drivers/bus/intel-ixp4xx-eb.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/bus/intel-ixp4xx-eb.c b/drivers/bus/intel-ixp4xx-eb.c index a4388440aca7..91db001eb69a 100644 --- a/drivers/bus/intel-ixp4xx-eb.c +++ b/drivers/bus/intel-ixp4xx-eb.c @@ -49,7 +49,7 @@ #define IXP4XX_EXP_SIZE_SHIFT 10 #define IXP4XX_EXP_CNFG_0 BIT(9) /* Always zero */ #define IXP43X_EXP_SYNC_INTEL BIT(8) /* Only on IXP43x */ -#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x */ +#define IXP43X_EXP_EXP_CHIP BIT(7) /* Only on IXP43x, dangerous to touch on IXP42x */ #define IXP4XX_EXP_BYTE_RD16 BIT(6) #define IXP4XX_EXP_HRDY_POL BIT(5) /* Only on IXP42x */ #define IXP4XX_EXP_MUX_EN BIT(4) @@ -57,8 +57,6 @@ #define IXP4XX_EXP_WORD BIT(2) /* Always zero */ #define IXP4XX_EXP_WR_EN BIT(1) #define IXP4XX_EXP_BYTE_EN BIT(0) -#define IXP42X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(8)|BIT(7)|IXP4XX_EXP_WORD) -#define IXP43X_RESERVED (BIT(30)|IXP4XX_EXP_CNFG_0|BIT(5)|IXP4XX_EXP_WORD) #define IXP4XX_EXP_CNFG0 0x20 #define IXP4XX_EXP_CNFG0_MEM_MAP BIT(31) @@ -252,10 +250,9 @@ static void ixp4xx_exp_setup_chipselect(struct ixp4xx_eb *eb, cs_cfg |= val << IXP4XX_EXP_CYC_TYPE_SHIFT; } - if (eb->is_42x) - cs_cfg &= ~IXP42X_RESERVED; if (eb->is_43x) { - cs_cfg &= ~IXP43X_RESERVED; + /* Should always be zero */ + cs_cfg &= ~IXP4XX_EXP_WORD; /* * This bit for Intel strata flash is currently unused, but let's * report it if we find one. From 22c17e279a1b03bad7987e4a4192b289b890f293 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Nov 2022 08:27:53 +0100 Subject: [PATCH 592/963] blk-mq: fix queue reference leak on blk_mq_alloc_disk_for_queue failure Drop the request queue reference just acquired when __alloc_disk_node failed. Fixes: 6f8191fdf41d ("block: simplify disk shutdown") Reported-by: Al Viro Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20221122072753.426077-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6a789cda68a5..228a6696d835 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4045,9 +4045,14 @@ EXPORT_SYMBOL(__blk_mq_alloc_disk); struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct lock_class_key *lkclass) { + struct gendisk *disk; + if (!blk_get_queue(q)) return NULL; - return __alloc_disk_node(q, NUMA_NO_NODE, lkclass); + disk = __alloc_disk_node(q, NUMA_NO_NODE, lkclass); + if (!disk) + blk_put_queue(q); + return disk; } EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue); From ccc6e5900745a60073e4967f04b618cdd92b63d6 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Tue, 15 Nov 2022 09:44:45 +0800 Subject: [PATCH 593/963] tracing/user_events: Fix memory leak in user_event_create() Before current_user_event_group(), it has allocated memory and save it in @name, this should freed before return error. Link: https://lkml.kernel.org/r/20221115014445.158419-1-xiujianfeng@huawei.com Fixes: e5d271812e7a ("tracing/user_events: Move pages/locks into groups to prepare for namespaces") Signed-off-by: Xiu Jianfeng Acked-by: Masami Hiramatsu (Google) Acked-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index ae78c2d53c8a..539b08ae7020 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1100,8 +1100,10 @@ static int user_event_create(const char *raw_command) group = current_user_event_group(); - if (!group) + if (!group) { + kfree(name); return -ENOENT; + } mutex_lock(&group->reg_mutex); From 022632f6c43a86f2135642dccd5686de318e861d Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 17 Nov 2022 14:46:17 +0100 Subject: [PATCH 594/963] tracing/osnoise: Fix duration type The duration type is a 64 long value, not an int. This was causing some long noise to report wrong values. Change the duration to a 64 bits value. Link: https://lkml.kernel.org/r/a93d8a8378c7973e9c609de05826533c9e977939.1668692096.git.bristot@kernel.org Cc: stable@vger.kernel.org Cc: Daniel Bristot de Oliveira Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jonathan Corbet Fixes: bce29ac9ce0b ("trace: Add osnoise tracer") Signed-off-by: Daniel Bristot de Oliveira Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 78d536d3ff3d..4300c5dc4e5d 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -917,7 +917,7 @@ void osnoise_trace_irq_entry(int id) void osnoise_trace_irq_exit(int id, const char *desc) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1048,7 +1048,7 @@ static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) { struct osnoise_variables *osn_var = this_cpu_osn_var(); - int duration; + s64 duration; if (!osn_var->sampling) return; @@ -1144,7 +1144,7 @@ thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) static void thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) { - int duration; + s64 duration; if (!osn_var->sampling) return; From 60d865bd5a9b15a3961eb1c08bd4155682a3c81e Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 21 Nov 2022 10:32:09 +0800 Subject: [PATCH 595/963] of: property: decrement node refcount in of_fwnode_get_reference_args() In of_fwnode_get_reference_args(), the refcount of of_args.np has been incremented in the case of successful return from of_parse_phandle_with_args() or of_parse_phandle_with_fixed_args(). Decrement the refcount if of_args is not returned to the caller of of_fwnode_get_reference_args(). Fixes: 3e3119d3088f ("device property: Introduce fwnode_property_get_reference_args") Signed-off-by: Yang Yingliang Reviewed-by: Sakari Ailus Reviewed-by: Frank Rowand Link: https://lore.kernel.org/r/20221121023209.3909759-1-yangyingliang@huawei.com Signed-off-by: Rob Herring --- drivers/of/property.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/property.c b/drivers/of/property.c index 967f79b59016..134cfc980b70 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -993,8 +993,10 @@ of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, nargs, index, &of_args); if (ret < 0) return ret; - if (!args) + if (!args) { + of_node_put(of_args.np); return 0; + } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); From f6abcc21d94393801937aed808b8f055ffec8579 Mon Sep 17 00:00:00 2001 From: Shazad Hussain Date: Tue, 15 Nov 2022 20:59:56 +0530 Subject: [PATCH 596/963] clk: qcom: gcc-sc8280xp: add cxo as parent for three ufs ref clks The three UFS reference clocks, gcc_ufs_ref_clkref_clk for external UFS devices, gcc_ufs_card_clkref_clk and gcc_ufs_1_card_clkref_clk for two PHYs are all sourced from CXO. Added parent_data for all three reference clocks described above to reflect that all three clocks are sourced from CXO to have valid frequency for the ref clock needed by UFS controller driver. Fixes: d65d005f9a6c ("clk: qcom: add sc8280xp GCC driver") Link: https://lore.kernel.org/lkml/Y2Tber39cHuOSR%2FW@hovoldconsulting.com/ Signed-off-by: Shazad Hussain Tested-by: Johan Hovold Reviewed-by: Johan Hovold Tested-by: Andrew Halaney Reviewed-by: Andrew Halaney Reviewed-by: Brian Masney Link: https://lore.kernel.org/r/20221115152956.21677-1-quic_shazhuss@quicinc.com Reviewed-by: Bjorn Andersson Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sc8280xp.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clk/qcom/gcc-sc8280xp.c b/drivers/clk/qcom/gcc-sc8280xp.c index a18ed88f3b82..b3198784e1c3 100644 --- a/drivers/clk/qcom/gcc-sc8280xp.c +++ b/drivers/clk/qcom/gcc-sc8280xp.c @@ -5364,6 +5364,8 @@ static struct clk_branch gcc_ufs_1_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_1_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5432,6 +5434,8 @@ static struct clk_branch gcc_ufs_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5848,6 +5852,8 @@ static struct clk_branch gcc_ufs_ref_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_ref_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, From fa0e381290b134da53e65fb421b65825f23221b4 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 22 Nov 2022 21:20:57 +0800 Subject: [PATCH 597/963] docs/zh_CN/LoongArch: Fix wrong description of FPRs Note The Chinese translation of FPRs Note is not consistent with the original English version, $v0/$v1 should be $fv0/$fv1, $a0/$a1 should be $fa0/$fa1, fix them. Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- Documentation/translations/zh_CN/loongarch/introduction.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/translations/zh_CN/loongarch/introduction.rst b/Documentation/translations/zh_CN/loongarch/introduction.rst index 128878f5bb70..f3ec25b163d7 100644 --- a/Documentation/translations/zh_CN/loongarch/introduction.rst +++ b/Documentation/translations/zh_CN/loongarch/introduction.rst @@ -70,8 +70,8 @@ LA64中每个寄存器为64位宽。 ``$r0`` 的内容总是固定为0,而其 ================= ================== =================== ========== .. note:: - 注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 - ``$a0`` 和 ``$a1`` 的别名,属于已经废弃的用法。 + 注意:在一些遗留代码中有时可能见到 ``$fv0`` 和 ``$fv1`` ,它们是 + ``$fa0`` 和 ``$fa1`` 的别名,属于已经废弃的用法。 向量寄存器 From f53af4285d775cd9a9a146fc438bd0a1bee1838a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 2 Aug 2022 12:28:11 -0400 Subject: [PATCH 598/963] mm: vmscan: fix extreme overreclaim and swap floods During proactive reclaim, we sometimes observe severe overreclaim, with several thousand times more pages reclaimed than requested. This trace was obtained from shrink_lruvec() during such an instance: prio:0 anon_cost:1141521 file_cost:7767 nr_reclaimed:4387406 nr_to_reclaim:1047 (or_factor:4190) nr=[7161123 345 578 1111] While he reclaimer requested 4M, vmscan reclaimed close to 16G, most of it by swapping. These requests take over a minute, during which the write() to memory.reclaim is unkillably stuck inside the kernel. Digging into the source, this is caused by the proportional reclaim bailout logic. This code tries to resolve a fundamental conflict: to reclaim roughly what was requested, while also aging all LRUs fairly and in accordance to their size, swappiness, refault rates etc. The way it attempts fairness is that once the reclaim goal has been reached, it stops scanning the LRUs with the smaller remaining scan targets, and adjusts the remainder of the bigger LRUs according to how much of the smaller LRUs was scanned. It then finishes scanning that remainder regardless of the reclaim goal. This works fine if priority levels are low and the LRU lists are comparable in size. However, in this instance, the cgroup that is targeted by proactive reclaim has almost no files left - they've already been squeezed out by proactive reclaim earlier - and the remaining anon pages are hot. Anon rotations cause the priority level to drop to 0, which results in reclaim targeting all of anon (a lot) and all of file (almost nothing). By the time reclaim decides to bail, it has scanned most or all of the file target, and therefor must also scan most or all of the enormous anon target. This target is thousands of times larger than the reclaim goal, thus causing the overreclaim. The bailout code hasn't changed in years, why is this failing now? The most likely explanations are two other recent changes in anon reclaim: 1. Before the series starting with commit 5df741963d52 ("mm: fix LRU balancing effect of new transparent huge pages"), the VM was overall relatively reluctant to swap at all, even if swap was configured. This means the LRU balancing code didn't come into play as often as it does now, and mostly in high pressure situations where pronounced swap activity wouldn't be as surprising. 2. For historic reasons, shrink_lruvec() loops on the scan targets of all LRU lists except the active anon one, meaning it would bail if the only remaining pages to scan were active anon - even if there were a lot of them. Before the series starting with commit ccc5dc67340c ("mm/vmscan: make active/inactive ratio as 1:1 for anon lru"), most anon pages would live on the active LRU; the inactive one would contain only a handful of preselected reclaim candidates. After the series, anon gets aged similarly to file, and the inactive list is the default for new anon pages as well, making it often the much bigger list. As a result, the VM is now more likely to actually finish large anon targets than before. Change the code such that only one SWAP_CLUSTER_MAX-sized nudge toward the larger LRU lists is made before bailing out on a met reclaim goal. This fixes the extreme overreclaim problem. Fairness is more subtle and harder to evaluate. No obvious misbehavior was observed on the test workload, in any case. Conceptually, fairness should primarily be a cumulative effect from regular, lower priority scans. Once the VM is in trouble and needs to escalate scan targets to make forward progress, fairness needs to take a backseat. This is also acknowledged by the myriad exceptions in get_scan_count(). This patch makes fairness decrease gradually, as it keeps fairness work static over increasing priority levels with growing scan targets. This should make more sense - although we may have to re-visit the exact values. Link: https://lkml.kernel.org/r/20220802162811.39216-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reviewed-by: Rik van Riel Acked-by: Mel Gorman Cc: Hugh Dickins Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton --- mm/vmscan.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 04d8b88e5216..0317d4cf4884 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5844,8 +5844,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) enum lru_list lru; unsigned long nr_reclaimed = 0; unsigned long nr_to_reclaim = sc->nr_to_reclaim; + bool proportional_reclaim; struct blk_plug plug; - bool scan_adjusted; if (lru_gen_enabled()) { lru_gen_shrink_lruvec(lruvec, sc); @@ -5868,8 +5868,8 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) * abort proportional reclaim if either the file or anon lru has already * dropped to zero at the first pass. */ - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && - sc->priority == DEF_PRIORITY); + proportional_reclaim = (!cgroup_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); blk_start_plug(&plug); while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || @@ -5889,7 +5889,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) cond_resched(); - if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + if (nr_reclaimed < nr_to_reclaim || proportional_reclaim) continue; /* @@ -5940,8 +5940,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) nr_scanned = targets[lru] - nr[lru]; nr[lru] = targets[lru] * (100 - percentage) / 100; nr[lru] -= min(nr[lru], nr_scanned); - - scan_adjusted = true; } blk_finish_plug(&plug); sc->nr_reclaimed += nr_reclaimed; From e031ff96b334a08704d40ef64cd9024d7d83af9b Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 8 Nov 2022 10:43:56 -0800 Subject: [PATCH 599/963] mm: khugepaged: allow page allocation fallback to eligible nodes Syzbot reported the below splat: WARNING: CPU: 1 PID: 3646 at include/linux/gfp.h:221 __alloc_pages_node include/linux/gfp.h:221 [inline] WARNING: CPU: 1 PID: 3646 at include/linux/gfp.h:221 hpage_collapse_alloc_page mm/khugepaged.c:807 [inline] WARNING: CPU: 1 PID: 3646 at include/linux/gfp.h:221 alloc_charge_hpage+0x802/0xaa0 mm/khugepaged.c:963 Modules linked in: CPU: 1 PID: 3646 Comm: syz-executor210 Not tainted 6.1.0-rc1-syzkaller-00454-ga70385240892 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 RIP: 0010:__alloc_pages_node include/linux/gfp.h:221 [inline] RIP: 0010:hpage_collapse_alloc_page mm/khugepaged.c:807 [inline] RIP: 0010:alloc_charge_hpage+0x802/0xaa0 mm/khugepaged.c:963 Code: e5 01 4c 89 ee e8 6e f9 ae ff 4d 85 ed 0f 84 28 fc ff ff e8 70 fc ae ff 48 8d 6b ff 4c 8d 63 07 e9 16 fc ff ff e8 5e fc ae ff <0f> 0b e9 96 fa ff ff 41 bc 1a 00 00 00 e9 86 fd ff ff e8 47 fc ae RSP: 0018:ffffc90003fdf7d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff888077f457c0 RSI: ffffffff81cd8f42 RDI: 0000000000000001 RBP: ffff888079388c0c R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: dffffc0000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f6b48ccf700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6b48a819f0 CR3: 00000000171e7000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: collapse_file+0x1ca/0x5780 mm/khugepaged.c:1715 hpage_collapse_scan_file+0xd6c/0x17a0 mm/khugepaged.c:2156 madvise_collapse+0x53a/0xb40 mm/khugepaged.c:2611 madvise_vma_behavior+0xd0a/0x1cc0 mm/madvise.c:1066 madvise_walk_vmas+0x1c7/0x2b0 mm/madvise.c:1240 do_madvise.part.0+0x24a/0x340 mm/madvise.c:1419 do_madvise mm/madvise.c:1432 [inline] __do_sys_madvise mm/madvise.c:1432 [inline] __se_sys_madvise mm/madvise.c:1430 [inline] __x64_sys_madvise+0x113/0x150 mm/madvise.c:1430 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f6b48a4eef9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6b48ccf318 EFLAGS: 00000246 ORIG_RAX: 000000000000001c RAX: ffffffffffffffda RBX: 00007f6b48af0048 RCX: 00007f6b48a4eef9 RDX: 0000000000000019 RSI: 0000000000600003 RDI: 0000000020000000 RBP: 00007f6b48af0040 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f6b48aa53a4 R13: 00007f6b48bffcbf R14: 00007f6b48ccf400 R15: 0000000000022000 The khugepaged code would pick up the node with the most hit as the preferred node, and also tries to do some balance if several nodes have the same hit record. Basically it does conceptually: * If the target_node <= last_target_node, then iterate from last_target_node + 1 to MAX_NUMNODES (1024 on default config) * If the max_value == node_load[nid], then target_node = nid But there is a corner case, paritucularly for MADV_COLLAPSE, that the non-existing node may be returned as preferred node. Assuming the system has 2 nodes, the target_node is 0 and the last_target_node is 1, if MADV_COLLAPSE path is hit, the max_value may be 0, then it may return 2 for target_node, but it is actually not existing (offline), so the warn is triggered. The node balance was introduced by commit 9f1b868a13ac ("mm: thp: khugepaged: add policy for finding target node") to satisfy "numactl --interleave=all". But interleaving is a mere hint rather than something that has hard requirements. So use nodemask to record the nodes which have the same hit record, the hugepage allocation could fallback to those nodes. And remove __GFP_THISNODE since it does disallow fallback. And if the nodemask just has one node set, it means there is one single node has the most hit record, the nodemask approach actually behaves like __GFP_THISNODE. Link: https://lkml.kernel.org/r/20221108184357.55614-2-shy828301@gmail.com Fixes: 7d8faaf15545 ("mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse") Signed-off-by: Yang Shi Suggested-by: Zach O'Keefe Suggested-by: Michal Hocko Reviewed-by: Zach O'Keefe Acked-by: Michal Hocko Reported-by: Signed-off-by: Andrew Morton --- mm/khugepaged.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4734315f7940..52b9cae2412d 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -97,8 +97,8 @@ struct collapse_control { /* Num pages scanned per node */ u32 node_load[MAX_NUMNODES]; - /* Last target selected in hpage_collapse_find_target_node() */ - int last_target_node; + /* nodemask for allocation fallback */ + nodemask_t alloc_nmask; }; /** @@ -734,7 +734,6 @@ static void khugepaged_alloc_sleep(void) struct collapse_control khugepaged_collapse_control = { .is_khugepaged = true, - .last_target_node = NUMA_NO_NODE, }; static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc) @@ -783,16 +782,11 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) target_node = nid; } - /* do some balance if several nodes have the same hit record */ - if (target_node <= cc->last_target_node) - for (nid = cc->last_target_node + 1; nid < MAX_NUMNODES; - nid++) - if (max_value == cc->node_load[nid]) { - target_node = nid; - break; - } + for_each_online_node(nid) { + if (max_value == cc->node_load[nid]) + node_set(nid, cc->alloc_nmask); + } - cc->last_target_node = target_node; return target_node; } #else @@ -802,9 +796,10 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif -static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node) +static bool hpage_collapse_alloc_page(struct page **hpage, gfp_t gfp, int node, + nodemask_t *nmask) { - *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); + *hpage = __alloc_pages(gfp, HPAGE_PMD_ORDER, node, nmask); if (unlikely(!*hpage)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return false; @@ -955,12 +950,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, struct collapse_control *cc) { - /* Only allocate from the target node */ gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : - GFP_TRANSHUGE) | __GFP_THISNODE; + GFP_TRANSHUGE); int node = hpage_collapse_find_target_node(cc); - if (!hpage_collapse_alloc_page(hpage, gfp, node)) + if (!hpage_collapse_alloc_page(hpage, gfp, node, &cc->alloc_nmask)) return SCAN_ALLOC_HUGE_PAGE_FAIL; if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp))) return SCAN_CGROUP_CHARGE_FAIL; @@ -1144,6 +1138,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, goto out; memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); pte = pte_offset_map_lock(mm, pmd, address, &ptl); for (_address = address, _pte = pte; _pte < pte + HPAGE_PMD_NR; _pte++, _address += PAGE_SIZE) { @@ -2077,6 +2072,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, present = 0; swap = 0; memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); rcu_read_lock(); xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) { if (xas_retry(&xas, page)) @@ -2576,7 +2572,6 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, if (!cc) return -ENOMEM; cc->is_khugepaged = false; - cc->last_target_node = NUMA_NO_NODE; mmgrab(mm); lru_add_drain_all(); @@ -2602,6 +2597,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, } mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); + nodes_clear(cc->alloc_nmask); if (IS_ENABLED(CONFIG_SHMEM) && vma->vm_file) { struct file *file = get_file(vma->vm_file); pgoff_t pgoff = linear_page_index(vma, addr); From ed86b74874f839f0e579bdf92ea0a5aabdfabebb Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Tue, 8 Nov 2022 10:46:22 +0530 Subject: [PATCH 600/963] mm/page_exit: fix kernel doc warning in page_ext_put() Fix the below compiler warnings reported with 'make W=1 mm/'. mm/page_ext.c:178: warning: Function parameter or member 'page_ext' not described in 'page_ext_put'. [quic_pkondeti@quicinc.com: better patch title] Link: https://lkml.kernel.org/r/1667884582-2465-1-git-send-email-quic_charante@quicinc.com Fixes: b1d5488a252dc9 ("mm: fix use-after free of page_ext after race with memory-offline") Signed-off-by: Charan Teja Kalla Reported-by: Vlastimil Babka Tested-by: Vlastimil Babka Cc: Pavan Kondeti Signed-off-by: Andrew Morton --- mm/page_ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_ext.c b/mm/page_ext.c index affe80243b6d..ddf1968560f0 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -166,7 +166,7 @@ struct page_ext *page_ext_get(struct page *page) /** * page_ext_put() - Working with page extended information is done. - * @page_ext - Page extended information received from page_ext_get(). + * @page_ext: Page extended information received from page_ext_get(). * * The page extended information of the page may not be valid after this * function is called. From 045634ff1e8615714546d9dca92fcdbe0fd898ef Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Wed, 26 Oct 2022 10:15:24 +0530 Subject: [PATCH 601/963] mm/khugepaged: refactor mm_khugepaged_scan_file tracepoint to remove filename from function call Refactor the mm_khugepaged_scan_file tracepoint to move filename dereference to the tracepoint definition, to maintain consistency with other tracepoints[1]. [1]:lore.kernel.org/lkml/20221024111621.3ba17e2c@gandalf.local.home/ Link: https://lkml.kernel.org/r/20221026044524.54793-1-gautammenghani201@gmail.com Fixes: d41fd2016ed07 ("mm/khugepaged: add tracepoint to hpage_collapse_scan_file()") Signed-off-by: Gautam Menghani Reviewed-by: Yang Shi Reviewed-by: Zach O'Keefe Reviewed-by: Steven Rostedt (Google) Cc: David Hildenbrand Cc: Masami Hiramatsu (Google) Signed-off-by: Andrew Morton --- include/trace/events/huge_memory.h | 8 ++++---- mm/khugepaged.c | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 935af4947917..760455dfa860 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -171,15 +171,15 @@ TRACE_EVENT(mm_collapse_huge_page_swapin, TRACE_EVENT(mm_khugepaged_scan_file, - TP_PROTO(struct mm_struct *mm, struct page *page, const char *filename, + TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file, int present, int swap, int result), - TP_ARGS(mm, page, filename, present, swap, result), + TP_ARGS(mm, page, file, present, swap, result), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, pfn) - __string(filename, filename) + __string(filename, file->f_path.dentry->d_iname) __field(int, present) __field(int, swap) __field(int, result) @@ -188,7 +188,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, TP_fast_assign( __entry->mm = mm; __entry->pfn = page ? page_to_pfn(page) : -1; - __assign_str(filename, filename); + __assign_str(filename, file->f_path.dentry->d_iname); __entry->present = present; __entry->swap = swap; __entry->result = result; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 52b9cae2412d..a8d5ef2a77d2 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2153,8 +2153,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, } } - trace_mm_khugepaged_scan_file(mm, page, file->f_path.dentry->d_iname, - present, swap, result); + trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result); return result; } #else From a6f810efabfd789d3bbafeacb4502958ec56c5ce Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Thu, 10 Nov 2022 00:31:37 +0530 Subject: [PATCH 602/963] gcov: clang: fix the buffer overflow issue Currently, in clang version of gcov code when module is getting removed gcov_info_add() incorrectly adds the sfn_ptr->counter to all the dst->functions and it result in the kernel panic in below crash report. Fix this by properly handling it. [ 8.899094][ T599] Unable to handle kernel write to read-only memory at virtual address ffffff80461cc000 [ 8.899100][ T599] Mem abort info: [ 8.899102][ T599] ESR = 0x9600004f [ 8.899103][ T599] EC = 0x25: DABT (current EL), IL = 32 bits [ 8.899105][ T599] SET = 0, FnV = 0 [ 8.899107][ T599] EA = 0, S1PTW = 0 [ 8.899108][ T599] FSC = 0x0f: level 3 permission fault [ 8.899110][ T599] Data abort info: [ 8.899111][ T599] ISV = 0, ISS = 0x0000004f [ 8.899113][ T599] CM = 0, WnR = 1 [ 8.899114][ T599] swapper pgtable: 4k pages, 39-bit VAs, pgdp=00000000ab8de000 [ 8.899116][ T599] [ffffff80461cc000] pgd=18000009ffcde003, p4d=18000009ffcde003, pud=18000009ffcde003, pmd=18000009ffcad003, pte=00600000c61cc787 [ 8.899124][ T599] Internal error: Oops: 9600004f [#1] PREEMPT SMP [ 8.899265][ T599] Skip md ftrace buffer dump for: 0x1609e0 .... .., [ 8.899544][ T599] CPU: 7 PID: 599 Comm: modprobe Tainted: G S OE 5.15.41-android13-8-g38e9b1af6bce #1 [ 8.899547][ T599] Hardware name: XXX (DT) [ 8.899549][ T599] pstate: 82400005 (Nzcv daif +PAN -UAO +TCO -DIT -SSBS BTYPE=--) [ 8.899551][ T599] pc : gcov_info_add+0x9c/0xb8 [ 8.899557][ T599] lr : gcov_event+0x28c/0x6b8 [ 8.899559][ T599] sp : ffffffc00e733b00 [ 8.899560][ T599] x29: ffffffc00e733b00 x28: ffffffc00e733d30 x27: ffffffe8dc297470 [ 8.899563][ T599] x26: ffffffe8dc297000 x25: ffffffe8dc297000 x24: ffffffe8dc297000 [ 8.899566][ T599] x23: ffffffe8dc0a6200 x22: ffffff880f68bf20 x21: 0000000000000000 [ 8.899569][ T599] x20: ffffff880f68bf00 x19: ffffff8801babc00 x18: ffffffc00d7f9058 [ 8.899572][ T599] x17: 0000000000088793 x16: ffffff80461cbe00 x15: 9100052952800785 [ 8.899575][ T599] x14: 0000000000000200 x13: 0000000000000041 x12: 9100052952800785 [ 8.899577][ T599] x11: ffffffe8dc297000 x10: ffffffe8dc297000 x9 : ffffff80461cbc80 [ 8.899580][ T599] x8 : ffffff8801babe80 x7 : ffffffe8dc2ec000 x6 : ffffffe8dc2ed000 [ 8.899583][ T599] x5 : 000000008020001f x4 : fffffffe2006eae0 x3 : 000000008020001f [ 8.899586][ T599] x2 : ffffff8027c49200 x1 : ffffff8801babc20 x0 : ffffff80461cb3a0 [ 8.899589][ T599] Call trace: [ 8.899590][ T599] gcov_info_add+0x9c/0xb8 [ 8.899592][ T599] gcov_module_notifier+0xbc/0x120 [ 8.899595][ T599] blocking_notifier_call_chain+0xa0/0x11c [ 8.899598][ T599] do_init_module+0x2a8/0x33c [ 8.899600][ T599] load_module+0x23cc/0x261c [ 8.899602][ T599] __arm64_sys_finit_module+0x158/0x194 [ 8.899604][ T599] invoke_syscall+0x94/0x2bc [ 8.899607][ T599] el0_svc_common+0x1d8/0x34c [ 8.899609][ T599] do_el0_svc+0x40/0x54 [ 8.899611][ T599] el0_svc+0x94/0x2f0 [ 8.899613][ T599] el0t_64_sync_handler+0x88/0xec [ 8.899615][ T599] el0t_64_sync+0x1b4/0x1b8 [ 8.899618][ T599] Code: f905f56c f86e69ec f86e6a0f 8b0c01ec (f82e6a0c) [ 8.899620][ T599] ---[ end trace ed5218e9e5b6e2e6 ]--- Link: https://lkml.kernel.org/r/1668020497-13142-1-git-send-email-quic_mojha@quicinc.com Fixes: e178a5beb369 ("gcov: clang support") Signed-off-by: Mukesh Ojha Reviewed-by: Peter Oberparleiter Tested-by: Peter Oberparleiter Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Tom Rix Cc: [5.2+] Signed-off-by: Andrew Morton --- kernel/gcov/clang.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index cbb0bed958ab..7670a811a565 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -280,6 +280,8 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) for (i = 0; i < sfn_ptr->num_counters; i++) dfn_ptr->counters[i] += sfn_ptr->counters[i]; + + sfn_ptr = list_next_entry(sfn_ptr, head); } } From b6305049f30652f1efcf78d627fc6656151a7929 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 14 Nov 2022 13:00:18 -0800 Subject: [PATCH 603/963] ipc/shm: call underlying open/close vm_ops Shared memory segments can be created that are backed by hugetlb pages. When this happens, the vmas associated with any mappings (shmat) are marked VM_HUGETLB, yet the vm_ops for such mappings are provided by ipc/shm (shm_vm_ops). There is a mechanism to call the underlying hugetlb vm_ops, and this is done for most operations. However, it is not done for open and close. This was not an issue until the introduction of the hugetlb vma_lock. This lock structure is pointed to by vm_private_data and the open/close vm_ops help maintain this structure. The special hugetlb routine called at fork took care of structure updates at fork time. However, vma_splitting is not properly handled for ipc shared memory mappings backed by hugetlb pages. This can result in a "kernel NULL pointer dereference" BUG or use after free as two vmas point to the same lock structure. Update the shm open and close routines to always call the underlying open and close routines. Link: https://lkml.kernel.org/r/20221114210018.49346-1-mike.kravetz@oracle.com Fixes: 8d9bfb260814 ("hugetlb: add vma based lock for pmd sharing") Signed-off-by: Mike Kravetz Reported-by: Doug Nelson Reported-by: Cc: Alexander Mikhalitsyn Cc: "Eric W . Biederman" Cc: Manfred Spraul Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Michal Hocko Signed-off-by: Andrew Morton --- ipc/shm.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/ipc/shm.c b/ipc/shm.c index 7d86f058fb86..bd2fcc4d454e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -275,10 +275,8 @@ static inline void shm_rmid(struct shmid_kernel *s) } -static int __shm_open(struct vm_area_struct *vma) +static int __shm_open(struct shm_file_data *sfd) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); @@ -302,7 +300,15 @@ static int __shm_open(struct vm_area_struct *vma) /* This is called by fork, once for every shm attach. */ static void shm_open(struct vm_area_struct *vma) { - int err = __shm_open(vma); + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + int err; + + /* Always call underlying open if present */ + if (sfd->vm_ops->open) + sfd->vm_ops->open(vma); + + err = __shm_open(sfd); /* * We raced in the idr lookup or with shm_destroy(). * Either way, the ID is busted. @@ -359,10 +365,8 @@ static bool shm_may_destroy(struct shmid_kernel *shp) * The descriptor has already been removed from the current->mm->mmap list * and will later be kfree()d. */ -static void shm_close(struct vm_area_struct *vma) +static void __shm_close(struct shm_file_data *sfd) { - struct file *file = vma->vm_file; - struct shm_file_data *sfd = shm_file_data(file); struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; @@ -388,6 +392,18 @@ static void shm_close(struct vm_area_struct *vma) up_write(&shm_ids(ns).rwsem); } +static void shm_close(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + + /* Always call underlying close if present */ + if (sfd->vm_ops->close) + sfd->vm_ops->close(vma); + + __shm_close(sfd); +} + /* Called with ns->shm_ids(ns).rwsem locked */ static int shm_try_destroy_orphaned(int id, void *p, void *data) { @@ -583,13 +599,13 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma) * IPC ID that was removed, and possibly even reused by another shm * segment already. Propagate this case as an error to caller. */ - ret = __shm_open(vma); + ret = __shm_open(sfd); if (ret) return ret; ret = call_mmap(sfd->file, vma); if (ret) { - shm_close(vma); + __shm_close(sfd); return ret; } sfd->vm_ops = vma->vm_ops; From cd08d80ecdac577bad2e8d6805c7a3859fdefb8d Mon Sep 17 00:00:00 2001 From: Li Liguang Date: Mon, 14 Nov 2022 14:48:28 -0500 Subject: [PATCH 604/963] mm: correctly charge compressed memory to its memcg Kswapd will reclaim memory when memory pressure is high, the annonymous memory will be compressed and stored in the zpool if zswap is enabled. The memcg_kmem_bypass() in get_obj_cgroup_from_page() will bypass the kernel thread and cause the compressed memory not be charged to its memory cgroup. Remove the memcg_kmem_bypass() call and properly charge compressed memory to its corresponding memory cgroup. Link: https://lore.kernel.org/linux-mm/CALvZod4nnn8BHYqAM4xtcR0Ddo2-Wr8uKm9h_CHWUaXw7g_DCg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20221114194828.100822-1-hannes@cmpxchg.org Fixes: f4840ccfca25 ("zswap: memcg accounting") Signed-off-by: Li Liguang Signed-off-by: Johannes Weiner Acked-by: Shakeel Butt Reviewed-by: Muchun Song Cc: Michal Hocko Cc: Roman Gushchin Cc: [5.19+] Signed-off-by: Andrew Morton --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2d8549ae1b30..a1a35c12635e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3026,7 +3026,7 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) { struct obj_cgroup *objcg; - if (!memcg_kmem_enabled() || memcg_kmem_bypass()) + if (!memcg_kmem_enabled()) return NULL; if (PageMemcgKmem(page)) { From 4a955bed882e734807024afd8f53213d4c61ff97 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 14 Nov 2022 22:55:37 +1100 Subject: [PATCH 605/963] mm/memory: return vm_fault_t result from migrate_to_ram() callback The migrate_to_ram() callback should always succeed, but in rare cases can fail usually returning VM_FAULT_SIGBUS. Commit 16ce101db85d ("mm/memory.c: fix race when faulting a device private page") incorrectly stopped passing the return code up the stack. Fix this by setting the ret variable, restoring the previous behaviour on migrate_to_ram() failure. Link: https://lkml.kernel.org/r/20221114115537.727371-1-apopple@nvidia.com Fixes: 16ce101db85d ("mm/memory.c: fix race when faulting a device private page") Signed-off-by: Alistair Popple Acked-by: David Hildenbrand Reviewed-by: Felix Kuehling Cc: Ralph Campbell Cc: John Hubbard Cc: Alex Sierra Cc: Ben Skeggs Cc: Lyude Paul Cc: Jason Gunthorpe Cc: Michael Ellerman Signed-off-by: Andrew Morton --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index f88c351aecd4..8a6d5c823f91 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3763,7 +3763,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */ get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); - vmf->page->pgmap->ops->migrate_to_ram(vmf); + ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); put_page(vmf->page); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; From 8468b486612c808c9e337708d66a435498f1735c Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 14 Nov 2022 17:55:52 +0000 Subject: [PATCH 606/963] mm/damon/sysfs-schemes: skip stats update if the scheme directory is removed A DAMON sysfs interface user can start DAMON with a scheme, remove the sysfs directory for the scheme, and then ask update of the scheme's stats. Because the schemes stats update logic isn't aware of the situation, it results in an invalid memory access. Fix the bug by checking if the scheme sysfs directory exists. Link: https://lkml.kernel.org/r/20221114175552.1951-1-sj@kernel.org Fixes: 0ac32b8affb5 ("mm/damon/sysfs: support DAMOS stats") Signed-off-by: SeongJae Park Cc: [v5.18] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 9f1219a67e3f..5ce403378c20 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2339,6 +2339,10 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond) damon_for_each_scheme(scheme, ctx) { struct damon_sysfs_stats *sysfs_stats; + /* user could have removed the scheme sysfs dir */ + if (schemes_idx >= sysfs_schemes->nr) + break; + sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats; sysfs_stats->nr_tried = scheme->stat.nr_tried; sysfs_stats->sz_tried = scheme->stat.sz_tried; From 4a42344081ff7fbb890c0741e11d22cd7f658894 Mon Sep 17 00:00:00 2001 From: Ian Cowan Date: Sun, 13 Nov 2022 19:33:49 -0500 Subject: [PATCH 607/963] mm: mmap: fix documentation for vma_mas_szero When the struct_mm input, mm, was changed to a struct ma_state, mas, the documentation for the function was never updated. This updates that documentation reference. Link: https://lkml.kernel.org/r/20221114003349.41235-1-ian@linux.cowan.aero Signed-off-by: Ian Cowan Acked-by: David Hildenbrand Cc: Liam Howlett Signed-off-by: Andrew Morton --- mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mmap.c b/mm/mmap.c index c3c5c1d6103d..74a84eb33b90 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -456,7 +456,7 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas) * vma_mas_szero() - Set a given range to zero. Used when modifying a * vm_area_struct start or end. * - * @mm: The struct_mm + * @mas: The maple tree ma_state * @start: The start address to zero * @end: The end address to zero. */ From d39e2ad63def4432ed0ec59c0e64fee988ef42f0 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Sun, 13 Nov 2022 17:13:01 -0700 Subject: [PATCH 608/963] mailmap: update Alex Hung's email address I am no longer at Canonical and add entry of my personal email address. Link: https://lkml.kernel.org/r/20221114001302.671897-1-alex.hung@amd.com Signed-off-by: Alex Hung Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index fdd7989492fc..f9e4472b891a 100644 --- a/.mailmap +++ b/.mailmap @@ -29,6 +29,7 @@ Alexandre Belloni Alexei Starovoitov Alexei Starovoitov +Alex Hung Alex Shi Alex Shi Alex Shi From db8e0998c35af67bceee91cad8acf3f6f330782f Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Sun, 13 Nov 2022 17:13:02 -0700 Subject: [PATCH 609/963] MAINTAINERS: update Alex Hung's email address Use my personal email address. Link: https://lkml.kernel.org/r/20221114001302.671897-2-alex.hung@amd.com Signed-off-by: Alex Hung Signed-off-by: Andrew Morton --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 046ff06ff97f..0e152a7afda8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10288,7 +10288,7 @@ T: git https://github.com/intel/gvt-linux.git F: drivers/gpu/drm/i915/gvt/ INTEL HID EVENT DRIVER -M: Alex Hung +M: Alex Hung L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/hid.c From 50c697215a8cc22f0e58c88f06f2716c05a26e85 Mon Sep 17 00:00:00 2001 From: Sam James Date: Wed, 16 Nov 2022 18:26:34 +0000 Subject: [PATCH 610/963] kbuild: fix -Wimplicit-function-declaration in license_is_gpl_compatible Add missing include for strcmp. Clang 16 makes -Wimplicit-function-declaration an error by default. Unfortunately, out of tree modules may use this in configure scripts, which means failure might cause silent miscompilation or misconfiguration. For more information, see LWN.net [0] or LLVM's Discourse [1], gentoo-dev@ [2], or the (new) c-std-porting mailing list [3]. [0] https://lwn.net/Articles/913505/ [1] https://discourse.llvm.org/t/configure-script-breakage-with-the-new-werror-implicit-function-declaration/65213 [2] https://archives.gentoo.org/gentoo-dev/message/dd9f2d3082b8b6f8dfbccb0639e6e240 [3] hosted at lists.linux.dev. [akpm@linux-foundation.org: remember "linux/"] Link: https://lkml.kernel.org/r/20221116182634.2823136-1-sam@gentoo.org Signed-off-by: Sam James Cc: Signed-off-by: Andrew Morton --- include/linux/license.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/license.h b/include/linux/license.h index 7cce390f120b..ad937f57f2cb 100644 --- a/include/linux/license.h +++ b/include/linux/license.h @@ -2,6 +2,8 @@ #ifndef __LICENSE_H #define __LICENSE_H +#include + static inline int license_is_gpl_compatible(const char *license) { return (strcmp(license, "GPL") == 0 From 44af0b45d58d7b6f09ebb9081aa89b8bdc33a630 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 11 Nov 2022 11:51:35 +1100 Subject: [PATCH 611/963] mm/migrate_device: return number of migrating pages in args->cpages migrate_vma->cpages originally contained a count of the number of pages migrating including non-present pages which can be populated directly on the target. Commit 241f68859656 ("mm/migrate_device.c: refactor migrate_vma and migrate_device_coherent_page()") inadvertantly changed this to contain just the number of pages that were unmapped. Usage of migrate_vma->cpages isn't documented, but most drivers use it to see if all the requested addresses can be migrated so restore the original behaviour. Link: https://lkml.kernel.org/r/20221111005135.1344004-1-apopple@nvidia.com Fixes: 241f68859656 ("mm/migrate_device.c: refactor migrate_vma and migrate_deivce_coherent_page()") Signed-off-by: Alistair Popple Reported-by: Ralph Campbell Reviewed-by: Ralph Campbell Cc: John Hubbard Cc: Alex Sierra Cc: Ben Skeggs Cc: Felix Kuehling Cc: Lyude Paul Cc: Jason Gunthorpe Cc: Michael Ellerman Signed-off-by: Andrew Morton --- mm/migrate_device.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 6fa682eef7a0..721b2365dbca 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -357,7 +357,8 @@ static bool migrate_vma_check_page(struct page *page, struct page *fault_page) } /* - * Unmaps pages for migration. Returns number of unmapped pages. + * Unmaps pages for migration. Returns number of source pfns marked as + * migrating. */ static unsigned long migrate_device_unmap(unsigned long *src_pfns, unsigned long npages, @@ -373,8 +374,11 @@ static unsigned long migrate_device_unmap(unsigned long *src_pfns, struct page *page = migrate_pfn_to_page(src_pfns[i]); struct folio *folio; - if (!page) + if (!page) { + if (src_pfns[i] & MIGRATE_PFN_MIGRATE) + unmapped++; continue; + } /* ZONE_DEVICE pages are not on LRU */ if (!is_zone_device_page(page)) { From 47123d7fdffff7389f8ffa833110784ab9fc8bc6 Mon Sep 17 00:00:00 2001 From: Satya Priya Date: Wed, 16 Nov 2022 16:20:17 +0530 Subject: [PATCH 612/963] mailmap: update email address for Satya Priya Add and also update email address, skakit@codeaurora.org is no longer active. Link: https://lkml.kernel.org/r/20221116105017.3018971-1-quic_c_skakit@quicinc.com Signed-off-by: Satya Priya Cc: Konrad Dybcio Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index f9e4472b891a..aec51726551f 100644 --- a/.mailmap +++ b/.mailmap @@ -383,6 +383,7 @@ Santosh Shilimkar Santosh Shilimkar Sarangdhar Joshi Sascha Hauer +Satya Priya S.Çağlar Onur Sean Christopherson Sean Nyekjaer From 359a5e1416caaf9ce28396a65ed3e386cc5de663 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Tue, 15 Nov 2022 18:38:07 -0700 Subject: [PATCH 613/963] mm: multi-gen LRU: retry folios written back while isolated The page reclaim isolates a batch of folios from the tail of one of the LRU lists and works on those folios one by one. For a suitable swap-backed folio, if the swap device is async, it queues that folio for writeback. After the page reclaim finishes an entire batch, it puts back the folios it queued for writeback to the head of the original LRU list. In the meantime, the page writeback flushes the queued folios also by batches. Its batching logic is independent from that of the page reclaim. For each of the folios it writes back, the page writeback calls folio_rotate_reclaimable() which tries to rotate a folio to the tail. folio_rotate_reclaimable() only works for a folio after the page reclaim has put it back. If an async swap device is fast enough, the page writeback can finish with that folio while the page reclaim is still working on the rest of the batch containing it. In this case, that folio will remain at the head and the page reclaim will not retry it before reaching there. This patch adds a retry to evict_folios(). After evict_folios() has finished an entire batch and before it puts back folios it cannot free immediately, it retries those that may have missed the rotation. Before this patch, ~60% of folios swapped to an Intel Optane missed folio_rotate_reclaimable(). After this patch, ~99% of missed folios were reclaimed upon retry. This problem affects relatively slow async swap devices like Samsung 980 Pro much less and does not affect sync swap devices like zram or zswap at all. Link: https://lkml.kernel.org/r/20221116013808.3995280-1-yuzhao@google.com Fixes: ac35a4902374 ("mm: multi-gen LRU: minimal implementation") Signed-off-by: Yu Zhao Cc: "Yin, Fengwei" Signed-off-by: Andrew Morton --- mm/vmscan.c | 48 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 0317d4cf4884..e36666679e1c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4971,10 +4971,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap int scanned; int reclaimed; LIST_HEAD(list); + LIST_HEAD(clean); struct folio *folio; + struct folio *next; enum vm_event_item item; struct reclaim_stat stat; struct lru_gen_mm_walk *walk; + bool skip_retry = false; struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); @@ -4991,20 +4994,37 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap if (list_empty(&list)) return scanned; - +retry: reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false); + sc->nr_reclaimed += reclaimed; - list_for_each_entry(folio, &list, lru) { - /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ - if (folio_test_workingset(folio)) - folio_set_referenced(folio); + list_for_each_entry_safe_reverse(folio, next, &list, lru) { + if (!folio_evictable(folio)) { + list_del(&folio->lru); + folio_putback_lru(folio); + continue; + } - /* don't add rejected pages to the oldest generation */ if (folio_test_reclaim(folio) && - (folio_test_dirty(folio) || folio_test_writeback(folio))) - folio_clear_active(folio); - else - folio_set_active(folio); + (folio_test_dirty(folio) || folio_test_writeback(folio))) { + /* restore LRU_REFS_FLAGS cleared by isolate_folio() */ + if (folio_test_workingset(folio)) + folio_set_referenced(folio); + continue; + } + + if (skip_retry || folio_test_active(folio) || folio_test_referenced(folio) || + folio_mapped(folio) || folio_test_locked(folio) || + folio_test_dirty(folio) || folio_test_writeback(folio)) { + /* don't add rejected folios to the oldest generation */ + set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, + BIT(PG_active)); + continue; + } + + /* retry folios that may have missed folio_rotate_reclaimable() */ + list_move(&folio->lru, &clean); + sc->nr_scanned -= folio_nr_pages(folio); } spin_lock_irq(&lruvec->lru_lock); @@ -5026,7 +5046,13 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap mem_cgroup_uncharge_list(&list); free_unref_page_list(&list); - sc->nr_reclaimed += reclaimed; + INIT_LIST_HEAD(&list); + list_splice_init(&clean, &list); + + if (!list_empty(&list)) { + skip_retry = true; + goto retry; + } if (need_swapping && type == LRU_GEN_ANON) *need_swapping = true; From f850c84948ef2d4f5e11fd8e528c2ac3b3c3d9c4 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Thu, 17 Nov 2022 04:32:47 +0000 Subject: [PATCH 614/963] proc/meminfo: fix spacing in SecPageTables SecPageTables has a tab after it instead of a space, this can break fragile parsers that depend on spaces after the stat names. Link: https://lkml.kernel.org/r/20221117043247.133294-1-yosryahmed@google.com Fixes: ebc97a52b5d6cd5f ("mm: add NR_SECONDARY_PAGETABLE to count secondary page table uses.") Signed-off-by: Yosry Ahmed Acked-by: Johannes Weiner Acked-by: Shakeel Butt Cc: David Hildenbrand Cc: Marc Zyngier Cc: Sean Christopherson Signed-off-by: Andrew Morton --- fs/proc/meminfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5101131e6047..440960110a42 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -115,7 +115,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif show_val_kb(m, "PageTables: ", global_node_page_state(NR_PAGETABLE)); - show_val_kb(m, "SecPageTables: ", + show_val_kb(m, "SecPageTables: ", global_node_page_state(NR_SECONDARY_PAGETABLE)); show_val_kb(m, "NFS_Unstable: ", 0); From 747c0f35f2d55de20093e992bd9fc7292193c0e6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 18 Nov 2022 16:22:16 +0100 Subject: [PATCH 615/963] kfence: fix stack trace pruning Commit b14051352465 ("mm/sl[au]b: generalize kmalloc subsystem") refactored large parts of the kmalloc subsystem, resulting in the stack trace pruning logic done by KFENCE to no longer work. While b14051352465 attempted to fix the situation by including '__kmem_cache_free' in the list of functions KFENCE should skip through, this only works when the compiler actually optimized the tail call from kfree() to __kmem_cache_free() into a jump (and thus kfree() _not_ appearing in the full stack trace to begin with). In some configurations, the compiler no longer optimizes the tail call into a jump, and __kmem_cache_free() appears in the stack trace. This means that the pruned stack trace shown by KFENCE would include kfree() which is not intended - for example: | BUG: KFENCE: invalid free in kfree+0x7c/0x120 | | Invalid free of 0xffff8883ed8fefe0 (in kfence-#126): | kfree+0x7c/0x120 | test_double_free+0x116/0x1a9 | kunit_try_run_case+0x90/0xd0 | [...] Fix it by moving __kmem_cache_free() to the list of functions that may be tail called by an allocator entry function, making the pruning logic work in both the optimized and unoptimized tail call cases. Link: https://lkml.kernel.org/r/20221118152216.3914899-1-elver@google.com Fixes: b14051352465 ("mm/sl[au]b: generalize kmalloc subsystem") Signed-off-by: Marco Elver Reviewed-by: Alexander Potapenko Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Feng Tang Signed-off-by: Andrew Morton --- mm/kfence/report.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 7e496856c2eb..46ecea18c4ca 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -75,18 +75,23 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") || + str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) { /* - * In case of tail calls from any of the below - * to any of the above. + * In case of tail calls from any of the below to any of + * the above, optimized by the compiler such that the + * stack trace would omit the initial entry point below. */ fallback = skipnr + 1; } - /* Also the *_bulk() variants by only checking prefixes. */ + /* + * The below list should only include the initial entry points + * into the slab allocators. Includes the *_bulk() variants by + * checking prefixes. + */ if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") || - str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmem_cache_free") || str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") || str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc")) goto found; From 7fb0728a9b005b8fc55e835529047cca15191031 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 18 Nov 2022 11:52:49 -0800 Subject: [PATCH 616/963] hugetlb: fix __prep_compound_gigantic_page page flag setting Commit 2b21624fc232 ("hugetlb: freeze allocated pages before creating hugetlb pages") changed the order page flags were cleared and set in the head page. It moved the __ClearPageReserved after __SetPageHead. However, there is a check to make sure __ClearPageReserved is never done on a head page. If CONFIG_DEBUG_VM_PGFLAGS is enabled, the following BUG will be hit when creating a hugetlb gigantic page: page dumped because: VM_BUG_ON_PAGE(1 && PageCompound(page)) ------------[ cut here ]------------ kernel BUG at include/linux/page-flags.h:500! Call Trace will differ depending on whether hugetlb page is created at boot time or run time. Make sure to __ClearPageReserved BEFORE __SetPageHead. Link: https://lkml.kernel.org/r/20221118195249.178319-1-mike.kravetz@oracle.com Fixes: 2b21624fc232 ("hugetlb: freeze allocated pages before creating hugetlb pages") Signed-off-by: Mike Kravetz Reported-by: Aneesh Kumar K.V Acked-by: Muchun Song Tested-by: Tarun Sahu Reviewed-by: Miaohe Lin Cc: Joao Martins Cc: Matthew Wilcox Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Peter Xu Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- mm/hugetlb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e48f8ef45b17..f1385c3b6c96 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1800,6 +1800,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, /* we rely on prep_new_huge_page to set the destructor */ set_compound_order(page, order); + __ClearPageReserved(page); __SetPageHead(page); for (i = 0; i < nr_pages; i++) { p = nth_page(page, i); @@ -1816,7 +1817,8 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order, * on the head page when they need know if put_page() is needed * after get_user_pages(). */ - __ClearPageReserved(p); + if (i != 0) /* head page cleared above */ + __ClearPageReserved(p); /* * Subtle and very unlikely * From de1ccfb648243a031cfbdc2d5571dfdaf5023106 Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Fri, 18 Nov 2022 21:38:50 +0800 Subject: [PATCH 617/963] swapfile: fix soft lockup in scan_swap_map_slots A softlockup occurs in scan free swap slot under huge memory pressure. The test scenario is: 64 CPU cores, 64GB memory, and 28 zram devices, the disksize of each zram device is 50MB. LATENCY_LIMIT is used to prevent softlockups in scan_swap_map_slots(), but the real loop number would more than LATENCY_LIMIT because of "goto checks and goto scan" repeatly without decreasing latency limit. In order to fix it, decrease latency_ration in advance. There is also a suspicious place that will cause softlockups in get_swap_pages(). In this function, the "goto start_over" may result in continuous scanning of the swap partition. If there is no cond_sched in scan_swap_map_slots(), it would cause a softlockup (I am not sure about this). WARN: soft lockup - CPU#11 stuck for 11s! [kswapd0:466] CPU: 11 PID: 466 Comm: kswapd@ Kdump: loaded Tainted: G dump backtrace+0x0/0x1le4 show stack+0x20/@x2c dump_stack+0xd8/0x140 watchdog print_info+0x48/0x54 watchdog_process_before_softlockup+0x98/0xa0 watchdog_timer_fn+0xlac/0x2d0 hrtimer_rum_queues+0xb0/0x130 hrtimer_interrupt+0x13c/0x3c0 arch_timer_handler_virt+0x3c/0x50 handLe_percpu_devid_irq+0x90/0x1f4 handle domain irq+0x84/0x100 gic_handle_irq+0x88/0x2b0 e11 ira+0xhB/Bx140 scan_swap_map_slots+0x678/0x890 get_swap_pages+0x29c/0x440 get_swap_page+0x120/0x2e0 add_to_swap+UX2U/0XyC shrink_page_list+0x5d0/0x152c shrink_inactive_list+0xl6c/Bx500 shrink_lruvec+0x270/0x304 WARN: soft lockup - CPU#32 stuck for 11s! [stress-ng:309915] watchdog_timer_fn+0x1ac/0x2d0 __run_hrtimer+0x98/0x2a0 __hrtimer_run_queues+0xb0/0x130 hrtimer_interrupt+0x13c/0x3c0 arch_timer_handler_virt+0x3c/0x50 handle_percpu_devid_irq+0x90/0x1f4 __handle_domain_irq+0x84/0x100 gic_handle_irq+0x88/0x2b0 el1_irq+0xb8/0x140 get_swap_pages+0x1e8/0x440 get_swap_page+0x1c8/0x2e0 add_to_swap+0x20/0x9c shrink_page_list+0x5d0/0x152c reclaim_pages+0x160/0x310 madvise_cold_or_pageout_pte_range+0x7bc/0xe3c walk_pmd_range.isra.0+0xac/0x22c walk_pud_range+0xfc/0x1c0 walk_pgd_range+0x158/0x1b0 __walk_page_range+0x64/0x100 walk_page_range+0x104/0x150 Link: https://lkml.kernel.org/r/20221118133850.3360369-1-chenwandun@huawei.com Fixes: 048c27fd7281 ("[PATCH] swap: scan_swap_map latency breaks") Signed-off-by: Chen Wandun Reviewed-by: "Huang, Ying" Cc: Hugh Dickins Cc: Kefeng Wang Cc: Nanyong Sun Cc: Signed-off-by: Andrew Morton --- mm/swapfile.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 5fc1237a9f21..72e481aacd5d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -973,23 +973,23 @@ static int scan_swap_map_slots(struct swap_info_struct *si, scan: spin_unlock(&si->lock); while (++offset <= READ_ONCE(si->highest_bit)) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; } offset = si->lowest_bit; while (offset < scan_base) { - if (swap_offset_available_and_locked(si, offset)) - goto checks; if (unlikely(--latency_ration < 0)) { cond_resched(); latency_ration = LATENCY_LIMIT; scanned_many = true; } + if (swap_offset_available_and_locked(si, offset)) + goto checks; offset++; } spin_lock(&si->lock); From ea4452de2ae987342fadbdd2c044034e6480daad Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Fri, 18 Nov 2022 18:00:11 +0800 Subject: [PATCH 618/963] mm: fix unexpected changes to {failslab|fail_page_alloc}.attr When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it. [akpm@linux-foundation.org: unexport should_fail_ex()] Link: https://lkml.kernel.org/r/20221118100011.2634-1-zhengqi.arch@bytedance.com Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Signed-off-by: Qi Zheng Reported-by: Dmitry Vyukov Reviewed-by: Akinobu Mita Reviewed-by: Jason Gunthorpe Cc: Akinobu Mita Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton --- include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 13 ++++++++----- mm/failslab.c | 12 ++++++++++-- mm/page_alloc.c | 7 +++++-- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ } #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 96e092de5b72..adb2f9355ee6 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { if (in_task()) { unsigned int fail_nth = READ_ONCE(current->fail_nth); @@ -146,13 +143,19 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0; + /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false; + /* + * In some cases, it expects to specify __GFP_NOWARN + * to avoid printing any information(not just a warning), + * thus avoiding deadlocks. See commit 6b9dbedbe349 for + * details. + */ if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); } static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 218b28ee49ed..6e60657875d3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3887,6 +3887,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0; + if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3897,10 +3899,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; + /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); } #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS From 81a70c21d9170de67a45843bdd627f4cce9c4215 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 18 Nov 2022 12:36:03 +0530 Subject: [PATCH 619/963] mm/cgroup/reclaim: fix dirty pages throttling on cgroup v1 balance_dirty_pages doesn't do the required dirty throttling on cgroupv1. See commit 9badce000e2c ("cgroup, writeback: don't enable cgroup writeback on traditional hierarchies"). Instead, the kernel depends on writeback throttling in shrink_folio_list to achieve the same goal. With large memory systems, the flusher may not be able to writeback quickly enough such that we will start finding pages in the shrink_folio_list already in writeback. Hence for cgroupv1 let's do a reclaim throttle after waking up the flusher. The below test which used to fail on a 256GB system completes till the the file system is full with this change. root@lp2:/sys/fs/cgroup/memory# mkdir test root@lp2:/sys/fs/cgroup/memory# cd test/ root@lp2:/sys/fs/cgroup/memory/test# echo 120M > memory.limit_in_bytes root@lp2:/sys/fs/cgroup/memory/test# echo $$ > tasks root@lp2:/sys/fs/cgroup/memory/test# dd if=/dev/zero of=/home/kvaneesh/test bs=1M Killed Link: https://lkml.kernel.org/r/20221118070603.84081-1-aneesh.kumar@linux.ibm.com Signed-off-by: Aneesh Kumar K.V Suggested-by: Johannes Weiner Acked-by: Johannes Weiner Cc: Tejun Heo Cc: zefan li Cc: Signed-off-by: Andrew Morton --- mm/vmscan.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index e36666679e1c..026199c047e0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2514,8 +2514,20 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan, * the flushers simply cannot keep up with the allocation * rate. Nudge the flusher threads in case they are asleep. */ - if (stat.nr_unqueued_dirty == nr_taken) + if (stat.nr_unqueued_dirty == nr_taken) { wakeup_flusher_threads(WB_REASON_VMSCAN); + /* + * For cgroupv1 dirty throttling is achieved by waking up + * the kernel flusher here and later waiting on folios + * which are in writeback to finish (see shrink_folio_list()). + * + * Flusher may not be able to issue writeback quickly + * enough for cgroupv1 writeback throttling to work + * on a large system. + */ + if (!writeback_throttling_sane(sc)) + reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK); + } sc->nr.dirty += stat.nr_dirty; sc->nr.congested += stat.nr_congested; From 512c5ca01a3610ab14ff6309db363de51f1c13a6 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 18 Nov 2022 14:33:04 +0800 Subject: [PATCH 620/963] nilfs2: fix nilfs_sufile_mark_dirty() not set segment usage as dirty When extending segments, nilfs_sufile_alloc() is called to get an unassigned segment, then mark it as dirty to avoid accidentally allocating the same segment in the future. But for some special cases such as a corrupted image it can be unreliable. If such corruption of the dirty state of the segment occurs, nilfs2 may reallocate a segment that is in use and pick the same segment for writing twice at the same time. This will cause the problem reported by syzkaller: https://syzkaller.appspot.com/bug?id=c7c4748e11ffcc367cef04f76e02e931833cbd24 This case started with segbuf1.segnum = 3, nextnum = 4 when constructed. It supposed segment 4 has already been allocated and marked as dirty. However the dirty state was corrupted and segment 4 usage was not dirty. For the first time nilfs_segctor_extend_segments() segment 4 was allocated again, which made segbuf2 and next segbuf3 had same segment 4. sb_getblk() will get same bh for segbuf2 and segbuf3, and this bh is added to both buffer lists of two segbuf. It makes the lists broken which causes NULL pointer dereference. Fix the problem by setting usage as dirty every time in nilfs_sufile_mark_dirty(), which is called during constructing current segment to be written out and before allocating next segment. [chenzhongjin@huawei.com: add lock protection per Ryusuke] Link: https://lkml.kernel.org/r/20221121091141.214703-1-chenzhongjin@huawei.com Link: https://lkml.kernel.org/r/20221118063304.140187-1-chenzhongjin@huawei.com Fixes: 9ff05123e3bf ("nilfs2: segment constructor") Signed-off-by: Chen Zhongjin Reported-by: Reported-by: Liu Shixin Acked-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/sufile.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 77ff8e95421f..dc359b56fdfa 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -495,14 +495,22 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) { struct buffer_head *bh; + void *kaddr; + struct nilfs_segment_usage *su; int ret; + down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); if (!ret) { mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); + kaddr = kmap_atomic(bh->b_page); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + nilfs_segment_usage_set_dirty(su); + kunmap_atomic(kaddr); brelse(bh); } + up_write(&NILFS_MDT(sufile)->mi_sem); return ret; } From de3db3f883a82c4800f4af0ae2cc3b96a408ee9b Mon Sep 17 00:00:00 2001 From: Li Hua Date: Mon, 21 Nov 2022 11:06:20 +0800 Subject: [PATCH 621/963] test_kprobes: fix implicit declaration error of test_kprobes If KPROBES_SANITY_TEST and ARCH_CORRECT_STACKTRACE_ON_KRETPROBE is enabled, but STACKTRACE is not set. Build failed as below: lib/test_kprobes.c: In function `stacktrace_return_handler': lib/test_kprobes.c:228:8: error: implicit declaration of function `stack_trace_save'; did you mean `stacktrace_driver'? [-Werror=implicit-function-declaration] ret = stack_trace_save(stack_buf, STACK_BUF_SIZE, 0); ^~~~~~~~~~~~~~~~ stacktrace_driver cc1: all warnings being treated as errors scripts/Makefile.build:250: recipe for target 'lib/test_kprobes.o' failed make[2]: *** [lib/test_kprobes.o] Error 1 To fix this error, Select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE is enabled. Link: https://lkml.kernel.org/r/20221121030620.63181-1-hucool.lihua@huawei.com Fixes: 1f6d3a8f5e39 ("kprobes: Add a test case for stacktrace from kretprobe handler") Signed-off-by: Li Hua Acked-by: Masami Hiramatsu (Google) Cc: Steven Rostedt (VMware) Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c3c0b077ade3..a1005415f0f4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2107,6 +2107,7 @@ config KPROBES_SANITY_TEST depends on DEBUG_KERNEL depends on KPROBES depends on KUNIT + select STACKTRACE if ARCH_CORRECT_STACKTRACE_ON_KRETPROBE default KUNIT_ALL_TESTS help This option provides for testing basic kprobes functionality on From 77934dc6db0d2b111a8f2759e9ad2fb67f5cffa5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 18 Nov 2022 17:49:11 -0800 Subject: [PATCH 622/963] dccp/tcp: Reset saddr on failure after inet6?_hash_connect(). When connect() is called on a socket bound to the wildcard address, we change the socket's saddr to a local address. If the socket fails to connect() to the destination, we have to reset the saddr. However, when an error occurs after inet_hash6?_connect() in (dccp|tcp)_v[46]_conect(), we forget to reset saddr and leave the socket bound to the address. From the user's point of view, whether saddr is reset or not varies with errno. Let's fix this inconsistent behaviour. Note that after this patch, the repro [0] will trigger the WARN_ON() in inet_csk_get_port() again, but this patch is not buggy and rather fixes a bug papering over the bhash2's bug for which we need another fix. For the record, the repro causes -EADDRNOTAVAIL in inet_hash6_connect() by this sequence: s1 = socket() s1.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) s1.bind(('127.0.0.1', 10000)) s1.sendto(b'hello', MSG_FASTOPEN, (('127.0.0.1', 10000))) # or s1.connect(('127.0.0.1', 10000)) s2 = socket() s2.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1) s2.bind(('0.0.0.0', 10000)) s2.connect(('127.0.0.1', 10000)) # -EADDRNOTAVAIL s2.listen(32) # WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2); [0]: https://syzkaller.appspot.com/bug?extid=015d756bbd1f8b5c8f09 Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Fixes: 7c657876b63c ("[DCCP]: Initial implementation") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Acked-by: Joanne Koong Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/dccp/ipv4.c | 2 ++ net/dccp/ipv6.c | 2 ++ net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/tcp_ipv6.c | 2 ++ 4 files changed, 8 insertions(+) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 713b7b8dad7e..40640c26680e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -157,6 +157,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index e57b43006074..626166cb6d7e 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -985,6 +985,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: dccp_set_state(sk, DCCP_CLOSED); + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); __sk_dst_reset(sk); failure: inet->inet_dport = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 87d440f47a70..6a3a732b584d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -343,6 +343,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * if necessary. */ tcp_set_state(sk, TCP_CLOSE); + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2a3f9296df1e..81b396e5cf79 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -359,6 +359,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: tcp_set_state(sk, TCP_CLOSE); + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; From 8acdad37cd13ce777b0811b2d332314037fcefa8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 18 Nov 2022 17:49:12 -0800 Subject: [PATCH 623/963] dccp/tcp: Remove NULL check for prev_saddr in inet_bhash2_update_saddr(). When we call inet_bhash2_update_saddr(), prev_saddr is always non-NULL. Let's remove the unnecessary test. Signed-off-by: Kuniyuki Iwashima Acked-by: Joanne Koong Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- net/ipv4/inet_hashtables.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 033bf3c2538f..d745f962745e 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -877,13 +877,10 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); - if (prev_saddr) { - spin_lock_bh(&prev_saddr->lock); - __sk_del_bind2_node(sk); - inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, - inet_csk(sk)->icsk_bind2_hash); - spin_unlock_bh(&prev_saddr->lock); - } + spin_lock_bh(&prev_saddr->lock); + __sk_del_bind2_node(sk); + inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); + spin_unlock_bh(&prev_saddr->lock); spin_lock_bh(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); From 8c5dae4c1a49489499e6708c7dd284370ca36287 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 18 Nov 2022 17:49:13 -0800 Subject: [PATCH 624/963] dccp/tcp: Update saddr under bhash's lock. When we call connect() for a socket bound to a wildcard address, we update saddr locklessly. However, it could result in a data race; another thread iterating over bhash might see a corrupted address. Let's update saddr under the bhash bucket's lock. Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Fixes: 7c657876b63c ("[DCCP]: Initial implementation") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Acked-by: Joanne Koong Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 2 +- net/dccp/ipv4.c | 22 ++++------------- net/dccp/ipv6.c | 21 +++------------- net/ipv4/af_inet.c | 11 +-------- net/ipv4/inet_hashtables.c | 45 ++++++++++++++++++++++++++++++----- net/ipv4/tcp_ipv4.c | 20 ++++------------ net/ipv6/tcp_ipv6.c | 19 +++------------ 7 files changed, 55 insertions(+), 85 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3af1e927247d..ba06e8b52264 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -281,7 +281,7 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's * rcv_saddr field should already have been updated when this is called. */ -int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk); +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family); void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 40640c26680e..95e376e3b911 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -45,11 +45,10 @@ static unsigned int dccp_v4_pernet_id __read_mostly; int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -91,26 +90,13 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) daddr = fl4->daddr; if (inet->inet_saddr == 0) { - if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = - inet_bhashfn_portaddr(&dccp_hashinfo, sk, - sock_net(sk), - inet->inet_num); - prev_sk_rcv_saddr = sk->sk_rcv_saddr; - } - inet->inet_saddr = fl4->saddr; - } - - sk_rcv_saddr_set(sk, inet->inet_saddr); - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); if (err) { - inet->inet_saddr = 0; - sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); ip_rt_put(rt); return err; } + } else { + sk_rcv_saddr_set(sk, inet->inet_saddr); } inet->inet_dport = usin->sin_port; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 626166cb6d7e..94c101ed57a9 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -934,26 +934,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, } if (saddr == NULL) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - struct in6_addr prev_v6_rcv_saddr; - - if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(&dccp_hashinfo, - sk, sock_net(sk), - inet->inet_num); - prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - } - saddr = &fl6.saddr; - sk->sk_v6_rcv_saddr = *saddr; - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); - if (err) { - sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; - goto failure; - } - } + err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); + if (err) + goto failure; } /* set the source address */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4728087c42a5..0da679411330 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1230,7 +1230,6 @@ EXPORT_SYMBOL(inet_unregister_protosw); static int inet_sk_reselect_saddr(struct sock *sk) { - struct inet_bind_hashbucket *prev_addr_hashbucket; struct inet_sock *inet = inet_sk(sk); __be32 old_saddr = inet->inet_saddr; __be32 daddr = inet->inet_daddr; @@ -1260,16 +1259,8 @@ static int inet_sk_reselect_saddr(struct sock *sk) return 0; } - prev_addr_hashbucket = - inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk, - sock_net(sk), inet->inet_num); - - inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; - - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &new_saddr, AF_INET); if (err) { - inet->inet_saddr = old_saddr; - inet->inet_rcv_saddr = old_saddr; ip_rt_put(rt); return err; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d745f962745e..18ef370af113 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -858,14 +858,34 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } -int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk) +static void inet_update_saddr(struct sock *sk, void *saddr, int family) +{ + if (family == AF_INET) { + inet_sk(sk)->inet_saddr = *(__be32 *)saddr; + sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else { + sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr; + } +#endif +} + +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); - struct inet_bind_hashbucket *head2; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); + int bhash; + + if (!inet_csk(sk)->icsk_bind2_hash) { + /* Not bind()ed before. */ + inet_update_saddr(sk, saddr, family); + return 0; + } /* Allocate a bind2 bucket ahead of time to avoid permanently putting * the bhash2 table in an inconsistent state if a new tb2 bucket @@ -875,14 +895,25 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc if (!new_tb2) return -ENOMEM; + bhash = inet_bhashfn(net, port, hinfo->bhash_size); + head = &hinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); - spin_lock_bh(&prev_saddr->lock); + /* If we change saddr locklessly, another thread + * iterating over bhash might see corrupted address. + */ + spin_lock_bh(&head->lock); + + spin_lock(&head2->lock); __sk_del_bind2_node(sk); inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); - spin_unlock_bh(&prev_saddr->lock); + spin_unlock(&head2->lock); - spin_lock_bh(&head2->lock); + inet_update_saddr(sk, saddr, family); + + head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); + + spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; @@ -890,7 +921,9 @@ int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct soc } sk_add_bind2_node(sk, &tb2->owners); inet_csk(sk)->icsk_bind2_hash = tb2; - spin_unlock_bh(&head2->lock); + spin_unlock(&head2->lock); + + spin_unlock_bh(&head->lock); if (tb2 != new_tb2) kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6a3a732b584d..23dd7e9df2d5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -199,15 +199,14 @@ static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_timewait_death_row *tcp_death_row; - __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct ip_options_rcu *inet_opt; struct net *net = sock_net(sk); __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; struct flowi4 *fl4; struct rtable *rt; int err; @@ -251,24 +250,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!inet->inet_saddr) { - if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, - sk, net, inet->inet_num); - prev_sk_rcv_saddr = sk->sk_rcv_saddr; - } - inet->inet_saddr = fl4->saddr; - } - - sk_rcv_saddr_set(sk, inet->inet_saddr); - - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); + err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET); if (err) { - inet->inet_saddr = 0; - sk_rcv_saddr_set(sk, prev_sk_rcv_saddr); ip_rt_put(rt); return err; } + } else { + sk_rcv_saddr_set(sk, inet->inet_saddr); } if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 81b396e5cf79..2f3ca3190d26 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -292,24 +292,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { - struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; - struct in6_addr prev_v6_rcv_saddr; - - if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, - sk, net, inet->inet_num); - prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - } saddr = &fl6.saddr; - sk->sk_v6_rcv_saddr = *saddr; - if (prev_addr_hashbucket) { - err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk); - if (err) { - sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr; - goto failure; - } - } + err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); + if (err) + goto failure; } /* set the source address */ From e0833d1fedb02f038b526ae7dde178a076f56545 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 18 Nov 2022 17:49:14 -0800 Subject: [PATCH 625/963] dccp/tcp: Fixup bhash2 bucket when connect() fails. If a socket bound to a wildcard address fails to connect(), we only reset saddr and keep the port. Then, we have to fix up the bhash2 bucket; otherwise, the bucket has an inconsistent address in the list. Also, listen() for such a socket will fire the WARN_ON() in inet_csk_get_port(). [0] Note that when a system runs out of memory, we give up fixing the bucket and unlink sk from bhash and bhash2 by inet_put_port(). [0]: WARNING: CPU: 0 PID: 207 at net/ipv4/inet_connection_sock.c:548 inet_csk_get_port (net/ipv4/inet_connection_sock.c:548 (discriminator 1)) Modules linked in: CPU: 0 PID: 207 Comm: bhash2_prev_rep Not tainted 6.1.0-rc3-00799-gc8421681c845 #63 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.amzn2022.0.1 04/01/2014 RIP: 0010:inet_csk_get_port (net/ipv4/inet_connection_sock.c:548 (discriminator 1)) Code: 74 a7 eb 93 48 8b 54 24 18 0f b7 cb 4c 89 e6 4c 89 ff e8 48 b2 ff ff 49 8b 87 18 04 00 00 e9 32 ff ff ff 0f 0b e9 34 ff ff ff <0f> 0b e9 42 ff ff ff 41 8b 7f 50 41 8b 4f 54 89 fe 81 f6 00 00 ff RSP: 0018:ffffc900003d7e50 EFLAGS: 00010202 RAX: ffff8881047fb500 RBX: 0000000000004e20 RCX: 0000000000000000 RDX: 000000000000000a RSI: 00000000fffffe00 RDI: 00000000ffffffff RBP: ffffffff8324dc00 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000001 R14: 0000000000004e20 R15: ffff8881054e1280 FS: 00007f8ac04dc740(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020001540 CR3: 00000001055fa003 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: inet_csk_listen_start (net/ipv4/inet_connection_sock.c:1205) inet_listen (net/ipv4/af_inet.c:228) __sys_listen (net/socket.c:1810) __x64_sys_listen (net/socket.c:1819 net/socket.c:1817 net/socket.c:1817) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) RIP: 0033:0x7f8ac051de5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 93 af 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007ffc1c177248 EFLAGS: 00000206 ORIG_RAX: 0000000000000032 RAX: ffffffffffffffda RBX: 0000000020001550 RCX: 00007f8ac051de5d RDX: ffffffffffffff80 RSI: 0000000000000000 RDI: 0000000000000004 RBP: 00007ffc1c177270 R08: 0000000000000018 R09: 0000000000000007 R10: 0000000020001540 R11: 0000000000000206 R12: 00007ffc1c177388 R13: 0000000000401169 R14: 0000000000403e18 R15: 00007f8ac0723000 Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Reported-by: syzbot Reported-by: Mat Martineau Signed-off-by: Kuniyuki Iwashima Acked-by: Joanne Koong Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 1 + net/dccp/ipv4.c | 3 +-- net/dccp/ipv6.c | 3 +-- net/dccp/proto.c | 3 +-- net/ipv4/inet_hashtables.c | 38 +++++++++++++++++++++++++++++++---- net/ipv4/tcp.c | 3 +-- net/ipv4/tcp_ipv4.c | 3 +-- net/ipv6/tcp_ipv6.c | 3 +-- 8 files changed, 41 insertions(+), 16 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ba06e8b52264..69174093078f 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -282,6 +282,7 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in * rcv_saddr field should already have been updated when this is called. */ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family); +void inet_bhash2_reset_saddr(struct sock *sk); void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 95e376e3b911..b780827f5e0a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -143,8 +143,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 94c101ed57a9..602f3432d80b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -970,8 +970,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: dccp_set_state(sk, DCCP_CLOSED); - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); __sk_dst_reset(sk); failure: inet->inet_dport = 0; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index c548ca3e9b0e..85e35c5e8890 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -279,8 +279,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 18ef370af113..3cec471a2cd2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -871,7 +871,7 @@ static void inet_update_saddr(struct sock *sk, void *saddr, int family) #endif } -int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) +static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; @@ -883,7 +883,11 @@ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) if (!inet_csk(sk)->icsk_bind2_hash) { /* Not bind()ed before. */ - inet_update_saddr(sk, saddr, family); + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); + return 0; } @@ -892,8 +896,19 @@ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) * allocation fails. */ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); - if (!new_tb2) + if (!new_tb2) { + if (reset) { + /* The (INADDR_ANY, port) bucket might have already + * been freed, then we cannot fixup icsk_bind2_hash, + * so we give up and unlink sk from bhash/bhash2 not + * to leave inconsistency in bhash2. + */ + inet_put_port(sk); + inet_reset_saddr(sk); + } + return -ENOMEM; + } bhash = inet_bhashfn(net, port, hinfo->bhash_size); head = &hinfo->bhash[bhash]; @@ -909,7 +924,10 @@ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); spin_unlock(&head2->lock); - inet_update_saddr(sk, saddr, family); + if (reset) + inet_reset_saddr(sk); + else + inet_update_saddr(sk, saddr, family); head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); @@ -930,8 +948,20 @@ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) return 0; } + +int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) +{ + return __inet_bhash2_update_saddr(sk, saddr, family, false); +} EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr); +void inet_bhash2_reset_saddr(struct sock *sk) +{ + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + __inet_bhash2_update_saddr(sk, NULL, 0, true); +} +EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr); + /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 54836a6b81d6..4f2205756cfe 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3114,8 +3114,7 @@ int tcp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); sk->sk_shutdown = 0; sock_reset_flag(sk, SOCK_DONE); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 23dd7e9df2d5..da46357f501b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -331,8 +331,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * if necessary. */ tcp_set_state(sk, TCP_CLOSE); - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); ip_rt_put(rt); sk->sk_route_caps = 0; inet->inet_dport = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2f3ca3190d26..f0548dbcabd2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -346,8 +346,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: tcp_set_state(sk, TCP_CLOSE); - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) - inet_reset_saddr(sk); + inet_bhash2_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; From 3213f808ae21be3891885de2f3a775afafcda987 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 20 Nov 2022 11:54:05 +0800 Subject: [PATCH 626/963] net: ethernet: mtk_eth_soc: fix potential memory leak in mtk_rx_alloc() When fail to dma_map_single() in mtk_rx_alloc(), it returns directly. But the memory allocated for local variable data is not freed, and local variabel data has not been attached to ring->data[i] yet, so the memory allocated for local variable data will not be freed outside mtk_rx_alloc() too. Thus memory leak would occur in this scenario. Add skb_free_frag(data) when dma_map_single() failed. Fixes: 23233e577ef9 ("net: ethernet: mtk_eth_soc: rely on page_pool for single page buffers") Signed-off-by: Ziyang Xuan Acked-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/20221120035405.1464341-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1d1f2342e3ec..bbffd92089bf 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -2378,8 +2378,10 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) data + NET_SKB_PAD + eth->ip_align, ring->buf_size, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(eth->dma_dev, - dma_addr))) + dma_addr))) { + skb_free_frag(data); return -ENOMEM; + } } rxd->rxd1 = (unsigned int)dma_addr; ring->data[i] = data; From 8110437e59616293228cd781c486d8495a61e36a Mon Sep 17 00:00:00 2001 From: Yan Cangang Date: Sun, 20 Nov 2022 13:52:58 +0800 Subject: [PATCH 627/963] net: ethernet: mtk_eth_soc: fix resource leak in error path In mtk_probe(), when mtk_ppe_init() or mtk_eth_offload_init() failed, mtk_mdio_cleanup() isn't called. Fix it. Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE") Fixes: 502e84e2382d ("net: ethernet: mtk_eth_soc: add flow offloading support") Signed-off-by: Yan Cangang Reviewed-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index bbffd92089bf..ae073b431738 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4147,13 +4147,13 @@ static int mtk_probe(struct platform_device *pdev) eth->soc->offload_version, i); if (!eth->ppe[i]) { err = -ENOMEM; - goto err_free_dev; + goto err_deinit_mdio; } } err = mtk_eth_offload_init(eth); if (err) - goto err_free_dev; + goto err_deinit_mdio; } for (i = 0; i < MTK_MAX_DEVS; i++) { From 603ea5e7ffa73c7fac07d8713d97285990695213 Mon Sep 17 00:00:00 2001 From: Yan Cangang Date: Sun, 20 Nov 2022 13:52:59 +0800 Subject: [PATCH 628/963] net: ethernet: mtk_eth_soc: fix memory leak in error path In mtk_ppe_init(), when dmam_alloc_coherent() or devm_kzalloc() failed, the rhashtable ppe->l2_flows isn't destroyed. Fix it. In mtk_probe(), when mtk_ppe_init() or mtk_eth_offload_init() or register_netdev() failed, have the same problem. Fix it. Fixes: 33fc42de3327 ("net: ethernet: mtk_eth_soc: support creating mac address based offload entries") Signed-off-by: Yan Cangang Reviewed-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 9 +++++---- drivers/net/ethernet/mediatek/mtk_ppe.c | 19 +++++++++++++++++-- drivers/net/ethernet/mediatek/mtk_ppe.h | 1 + 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index ae073b431738..1d36619c5ec9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4147,13 +4147,13 @@ static int mtk_probe(struct platform_device *pdev) eth->soc->offload_version, i); if (!eth->ppe[i]) { err = -ENOMEM; - goto err_deinit_mdio; + goto err_deinit_ppe; } } err = mtk_eth_offload_init(eth); if (err) - goto err_deinit_mdio; + goto err_deinit_ppe; } for (i = 0; i < MTK_MAX_DEVS; i++) { @@ -4163,7 +4163,7 @@ static int mtk_probe(struct platform_device *pdev) err = register_netdev(eth->netdev[i]); if (err) { dev_err(eth->dev, "error bringing up device\n"); - goto err_deinit_mdio; + goto err_deinit_ppe; } else netif_info(eth, probe, eth->netdev[i], "mediatek frame engine at 0x%08lx, irq %d\n", @@ -4181,7 +4181,8 @@ static int mtk_probe(struct platform_device *pdev) return 0; -err_deinit_mdio: +err_deinit_ppe: + mtk_ppe_deinit(eth); mtk_mdio_cleanup(eth); err_free_dev: mtk_free_dev(eth); diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c index 2d8ca99f2467..784ecb2dc9fb 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.c +++ b/drivers/net/ethernet/mediatek/mtk_ppe.c @@ -737,7 +737,7 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, MTK_PPE_ENTRIES * soc->foe_entry_size, &ppe->foe_phys, GFP_KERNEL); if (!foe) - return NULL; + goto err_free_l2_flows; ppe->foe_table = foe; @@ -745,11 +745,26 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, sizeof(*ppe->foe_flow); ppe->foe_flow = devm_kzalloc(dev, foe_flow_size, GFP_KERNEL); if (!ppe->foe_flow) - return NULL; + goto err_free_l2_flows; mtk_ppe_debugfs_init(ppe, index); return ppe; + +err_free_l2_flows: + rhashtable_destroy(&ppe->l2_flows); + return NULL; +} + +void mtk_ppe_deinit(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(eth->ppe); i++) { + if (!eth->ppe[i]) + return; + rhashtable_destroy(ð->ppe[i]->l2_flows); + } } static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h index 0b7a67a958e4..a09c32539bcc 100644 --- a/drivers/net/ethernet/mediatek/mtk_ppe.h +++ b/drivers/net/ethernet/mediatek/mtk_ppe.h @@ -304,6 +304,7 @@ struct mtk_ppe { struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version, int index); +void mtk_ppe_deinit(struct mtk_eth *eth); void mtk_ppe_start(struct mtk_ppe *ppe); int mtk_ppe_stop(struct mtk_ppe *ppe); From 568fe84940ac0e4e0b2cd7751b8b4911f7b9c215 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 20 Nov 2022 15:28:38 +0800 Subject: [PATCH 629/963] ipv4: Fix error return code in fib_table_insert() In fib_table_insert(), if the alias was already inserted, but node not exist, the error code should be set before return from error handling path. Fixes: a6c76c17df02 ("ipv4: Notify route after insertion to the routing table") Signed-off-by: Ziyang Xuan Link: https://lore.kernel.org/r/20221120072838.2167047-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_trie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index c88bf856c443..74d403dbd2b4 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1381,8 +1381,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb, /* The alias was already inserted, so the node must exist. */ l = l ? l : fib_find_node(t, &tp, key); - if (WARN_ON_ONCE(!l)) + if (WARN_ON_ONCE(!l)) { + err = -ENOENT; goto out_free_new_fa; + } if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) { From 391c18cf776eb4569ecda1f7794f360fe0a45a26 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Fri, 18 Nov 2022 22:44:41 +0900 Subject: [PATCH 630/963] 9p/xen: check logical size for buffer size trans_xen did not check the data fits into the buffer before copying from the xen ring, but we probably should. Add a check that just skips the request and return an error to userspace if it did not fit Tested-by: Stefano Stabellini Reviewed-by: Christian Schoenebeck Link: https://lkml.kernel.org/r/20221118135542.63400-1-asmadeus@codewreck.org Signed-off-by: Dominique Martinet --- net/9p/trans_xen.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index b15c64128c3e..aaa5fd364691 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -208,6 +208,14 @@ static void p9_xen_response(struct work_struct *work) continue; } + if (h.size > req->rc.capacity) { + dev_warn(&priv->dev->dev, + "requested packet size too big: %d for tag %d with capacity %zd\n", + h.size, h.tag, req->rc.capacity); + req->status = REQ_STATUS_ERROR; + goto recv_error; + } + memcpy(&req->rc, &h, sizeof(h)); req->rc.offset = 0; @@ -217,6 +225,7 @@ static void p9_xen_response(struct work_struct *work) masked_prod, &masked_cons, XEN_9PFS_RING_SIZE(ring)); +recv_error: virt_mb(); cons += h.size; ring->intf->in_cons = cons; From 44361e8cf9ddb23f17bdcc40ca944abf32e83e79 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 23 Nov 2022 09:10:42 +0100 Subject: [PATCH 631/963] fuse: lock inode unconditionally in fuse_fallocate() file_modified() must be called with inode lock held. fuse_fallocate() didn't lock the inode in case of just FALLOC_KEEP_SIZE flags value, which resulted in a kernel Warning in notify_change(). Lock the inode unconditionally, like all other fallocate implementations do. Reported-by: Pengfei Xu Reported-and-tested-by: syzbot+462da39f0667b357c4b6@syzkaller.appspotmail.com Fixes: 4a6f278d4827 ("fuse: add file_modified() to fallocate") Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 71bfb663aac5..89f4741728ba 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2963,11 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, .mode = mode }; int err; - bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || - (mode & (FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_ZERO_RANGE)); - - bool block_faults = FUSE_IS_DAX(inode) && lock_inode; + bool block_faults = FUSE_IS_DAX(inode) && + (!(mode & FALLOC_FL_KEEP_SIZE) || + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))); if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) @@ -2976,22 +2974,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fm->fc->no_fallocate) return -EOPNOTSUPP; - if (lock_inode) { - inode_lock(inode); - if (block_faults) { - filemap_invalidate_lock(inode->i_mapping); - err = fuse_dax_break_layouts(inode, 0, 0); - if (err) - goto out; - } + inode_lock(inode); + if (block_faults) { + filemap_invalidate_lock(inode->i_mapping); + err = fuse_dax_break_layouts(inode, 0, 0); + if (err) + goto out; + } - if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { - loff_t endbyte = offset + length - 1; + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { + loff_t endbyte = offset + length - 1; - err = fuse_writeback_range(inode, offset, endbyte); - if (err) - goto out; - } + err = fuse_writeback_range(inode, offset, endbyte); + if (err) + goto out; } if (!(mode & FALLOC_FL_KEEP_SIZE) && @@ -3039,8 +3035,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (block_faults) filemap_invalidate_unlock(inode->i_mapping); - if (lock_inode) - inode_unlock(inode); + inode_unlock(inode); fuse_flush_time_update(inode); From 5eef2141776da02772c44ec406d6871a790761ee Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 16 Nov 2022 15:07:22 +0000 Subject: [PATCH 632/963] media: v4l2-dv-timings.c: fix too strict blanking sanity checks Sanity checks were added to verify the v4l2_bt_timings blanking fields in order to avoid integer overflows when userspace passes weird values. But that assumed that userspace would correctly fill in the front porch, backporch and sync values, but sometimes all you know is the total blanking, which is then assigned to just one of these fields. And that can fail with these checks. So instead set a maximum for the total horizontal and vertical blanking and check that each field remains below that. That is still sufficient to avoid integer overflows, but it also allows for more flexibility in how userspace fills in these fields. Signed-off-by: Hans Verkuil Fixes: 4b6d66a45ed3 ("media: v4l2-dv-timings: add sanity checks for blanking values") Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-dv-timings.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c index 003c32fed3f7..942d0005c55e 100644 --- a/drivers/media/v4l2-core/v4l2-dv-timings.c +++ b/drivers/media/v4l2-core/v4l2-dv-timings.c @@ -145,6 +145,8 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, const struct v4l2_bt_timings *bt = &t->bt; const struct v4l2_bt_timings_cap *cap = &dvcap->bt; u32 caps = cap->capabilities; + const u32 max_vert = 10240; + u32 max_hor = 3 * bt->width; if (t->type != V4L2_DV_BT_656_1120) return false; @@ -166,14 +168,20 @@ bool v4l2_valid_dv_timings(const struct v4l2_dv_timings *t, if (!bt->interlaced && (bt->il_vbackporch || bt->il_vsync || bt->il_vfrontporch)) return false; - if (bt->hfrontporch > 2 * bt->width || - bt->hsync > 1024 || bt->hbackporch > 1024) + /* + * Some video receivers cannot properly separate the frontporch, + * backporch and sync values, and instead they only have the total + * blanking. That can be assigned to any of these three fields. + * So just check that none of these are way out of range. + */ + if (bt->hfrontporch > max_hor || + bt->hsync > max_hor || bt->hbackporch > max_hor) return false; - if (bt->vfrontporch > 4096 || - bt->vsync > 128 || bt->vbackporch > 4096) + if (bt->vfrontporch > max_vert || + bt->vsync > max_vert || bt->vbackporch > max_vert) return false; - if (bt->interlaced && (bt->il_vfrontporch > 4096 || - bt->il_vsync > 128 || bt->il_vbackporch > 4096)) + if (bt->interlaced && (bt->il_vfrontporch > max_vert || + bt->il_vsync > max_vert || bt->il_vbackporch > max_vert)) return false; return fnc == NULL || fnc(t, fnc_handle); } From 1c40cde6b5171d9c8dfc69be00464fd1c75e210b Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Sun, 20 Nov 2022 14:24:38 +0800 Subject: [PATCH 633/963] arcnet: fix potential memory leak in com20020_probe() In com20020_probe(), if com20020_config() fails, dev and info will not be freed, which will lead to a memory leak. This patch adds freeing dev and info after com20020_config() fails to fix this bug. Compile tested only. Fixes: 15b99ac17295 ("[PATCH] pcmcia: add return value to _config() functions") Signed-off-by: Wang Hai Signed-off-by: David S. Miller --- drivers/net/arcnet/com20020_cs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index 24150c933fcb..dc3253b318da 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -113,6 +113,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) struct com20020_dev *info; struct net_device *dev; struct arcnet_local *lp; + int ret = -ENOMEM; dev_dbg(&p_dev->dev, "com20020_attach()\n"); @@ -142,12 +143,18 @@ static int com20020_probe(struct pcmcia_device *p_dev) info->dev = dev; p_dev->priv = info; - return com20020_config(p_dev); + ret = com20020_config(p_dev); + if (ret) + goto fail_config; + return 0; + +fail_config: + free_arcdev(dev); fail_alloc_dev: kfree(info); fail_alloc_info: - return -ENOMEM; + return ret; } /* com20020_attach */ static void com20020_detach(struct pcmcia_device *link) From bac81f40c2c1484a2bd416b3fbf983f6e76488cd Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 21 Nov 2022 03:32:26 +0000 Subject: [PATCH 634/963] net: dm9051: Fix missing dev_kfree_skb() in dm9051_loop_rx() The dm9051_loop_rx() returns without release skb when dm9051_stop_mrcmd() returns error, free the skb to avoid this leak. Fixes: 2dc95a4d30ed ("net: Add dm9051 driver") Signed-off-by: Yuan Can Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/davicom/dm9051.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/davicom/dm9051.c b/drivers/net/ethernet/davicom/dm9051.c index a523ddda7609..de7105a84747 100644 --- a/drivers/net/ethernet/davicom/dm9051.c +++ b/drivers/net/ethernet/davicom/dm9051.c @@ -798,8 +798,10 @@ static int dm9051_loop_rx(struct board_info *db) } ret = dm9051_stop_mrcmd(db); - if (ret) + if (ret) { + dev_kfree_skb(skb); return ret; + } skb->protocol = eth_type_trans(skb, db->ndev); if (db->ndev->features & NETIF_F_RXCSUM) From af295e854a4e3813ffbdef26dbb6a4d6226c3ea1 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 21 Nov 2022 09:54:26 +0100 Subject: [PATCH 635/963] l2tp: Don't sleep and disable BH under writer-side sk_callback_lock When holding a reader-writer spin lock we cannot sleep. Calling setup_udp_tunnel_sock() with write lock held violates this rule, because we end up calling percpu_down_read(), which might sleep, as syzbot reports [1]: __might_resched.cold+0x222/0x26b kernel/sched/core.c:9890 percpu_down_read include/linux/percpu-rwsem.h:49 [inline] cpus_read_lock+0x1b/0x140 kernel/cpu.c:310 static_key_slow_inc+0x12/0x20 kernel/jump_label.c:158 udp_tunnel_encap_enable include/net/udp_tunnel.h:187 [inline] setup_udp_tunnel_sock+0x43d/0x550 net/ipv4/udp_tunnel_core.c:81 l2tp_tunnel_register+0xc51/0x1210 net/l2tp/l2tp_core.c:1509 pppol2tp_connect+0xcdc/0x1a10 net/l2tp/l2tp_ppp.c:723 Trim the writer-side critical section for sk_callback_lock down to the minimum, so that it covers only operations on sk_user_data. Also, when grabbing the sk_callback_lock, we always need to disable BH, as Eric points out. Failing to do so leads to deadlocks because we acquire sk_callback_lock in softirq context, which can get stuck waiting on us if: 1) it runs on the same CPU, or CPU0 ---- lock(clock-AF_INET6); lock(clock-AF_INET6); 2) lock ordering leads to priority inversion CPU0 CPU1 ---- ---- lock(clock-AF_INET6); local_irq_disable(); lock(&tcp_hashinfo.bhash[i].lock); lock(clock-AF_INET6); lock(&tcp_hashinfo.bhash[i].lock); ... as syzbot reports [2,3]. Use the _bh variants for write_(un)lock. [1] https://lore.kernel.org/netdev/0000000000004e78ec05eda79749@google.com/ [2] https://lore.kernel.org/netdev/000000000000e38b6605eda76f98@google.com/ [3] https://lore.kernel.org/netdev/000000000000dfa31e05eda76f75@google.com/ v2: - Check and set sk_user_data while holding sk_callback_lock for both L2TP encapsulation types (IP and UDP) (Tetsuo) Cc: Tom Parkin Cc: Tetsuo Handa Fixes: b68777d54fac ("l2tp: Serialize access to sk_user_data with sk_callback_lock") Reported-by: Eric Dumazet Reported-by: syzbot+703d9e154b3b58277261@syzkaller.appspotmail.com Reported-by: syzbot+50680ced9e98a61f7698@syzkaller.appspotmail.com Reported-by: syzbot+de987172bb74a381879b@syzkaller.appspotmail.com Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 754fdda8a5f5..9a1415fe3fa7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1474,11 +1474,12 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, } sk = sock->sk; - write_lock(&sk->sk_callback_lock); - + write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); if (ret < 0) - goto err_sock; + goto err_inval_sock; + rcu_assign_sk_user_data(sk, tunnel); + write_unlock_bh(&sk->sk_callback_lock); tunnel->l2tp_net = net; pn = l2tp_pernet(net); @@ -1507,8 +1508,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, }; setup_udp_tunnel_sock(net, sock, &udp_cfg); - } else { - rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1522,16 +1521,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); - write_unlock(&sk->sk_callback_lock); return 0; err_sock: + write_lock_bh(&sk->sk_callback_lock); + rcu_assign_sk_user_data(sk, NULL); +err_inval_sock: + write_unlock_bh(&sk->sk_callback_lock); + if (tunnel->fd < 0) sock_release(sock); else sockfd_put(sock); - - write_unlock(&sk->sk_callback_lock); err: return ret; } From a487069e11b6527373f7c6f435d8998051d0b5d9 Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Mon, 21 Nov 2022 13:54:55 +0100 Subject: [PATCH 636/963] net: usb: qmi_wwan: add u-blox 0x1342 composition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add RmNet support for LARA-L6. LARA-L6 module can be configured (by AT interface) in three different USB modes: * Default mode (Vendor ID: 0x1546 Product ID: 0x1341) with 4 serial interfaces * RmNet mode (Vendor ID: 0x1546 Product ID: 0x1342) with 4 serial interfaces and 1 RmNet virtual network interface * CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1343) with 4 serial interface and 1 CDC-ECM virtual network interface In RmNet mode LARA-L6 exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parset/alternative functions If 4: RMNET interface Signed-off-by: Davide Tronchin Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index afd6faa4c2ec..554d4e2a84a4 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1423,6 +1423,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ + {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ From 748064b54c99418f615aabff5755996cd9816969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santiago=20Ruano=20Rinc=C3=B3n?= Date: Mon, 21 Nov 2022 21:53:05 +0100 Subject: [PATCH 637/963] net/cdc_ncm: Fix multicast RX support for CDC NCM devices with ZLP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ZLP for DisplayLink ethernet devices was enabled in 6.0: 266c0190aee3 ("net/cdc_ncm: Enable ZLP for DisplayLink ethernet devices"). The related driver_info should be the "same as cdc_ncm_info, but with FLAG_SEND_ZLP". However, set_rx_mode that enables handling multicast traffic was missing in the new cdc_ncm_zlp_info. usbnet_cdc_update_filter rx mode was introduced in linux 5.9 with: e10dcb1b6ba7 ("net: cdc_ncm: hook into set_rx_mode to admit multicast traffic") Without this hook, multicast, and then IPv6 SLAAC, is broken. Fixes: 266c0190aee3 ("net/cdc_ncm: Enable ZLP for DisplayLink ethernet devices") Signed-off-by: Santiago Ruano Rincón Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8d5cbda33f66..0897fdb6254b 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1915,6 +1915,7 @@ static const struct driver_info cdc_ncm_zlp_info = { .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, + .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as cdc_ncm_info, but with FLAG_WWAN */ From 4f2bea62cf3874c5a58e987b0b472f9fb57117a2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 16 Nov 2022 11:26:53 -0500 Subject: [PATCH 638/963] drm/amdgpu/psp: don't free PSP buffers on suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can reuse the same buffers on resume. v2: squash in S4 fix from Shikai Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2213 Reviewed-by: Christian König Tested-by: Guilherme G. Piccoli Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index effa7df3ddbf..7978307e1d6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -172,6 +172,7 @@ void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) { amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); + mem_ctx->shared_bo = NULL; } static void psp_free_shared_bufs(struct psp_context *psp) @@ -182,6 +183,7 @@ static void psp_free_shared_bufs(struct psp_context *psp) /* free TMR memory buffer */ pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); + psp->tmr_bo = NULL; /* free xgmi shared memory */ psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context); @@ -743,7 +745,7 @@ static int psp_load_toc(struct psp_context *psp, /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { - int ret; + int ret = 0; int tmr_size; void *tmr_buf; void **pptr; @@ -770,10 +772,12 @@ static int psp_tmr_init(struct psp_context *psp) } } - pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; - ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + if (!psp->tmr_bo) { + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->tmr_bo, &psp->tmr_mc_addr, pptr); + } return ret; } @@ -2732,8 +2736,6 @@ static int psp_suspend(void *handle) } out: - psp_free_shared_bufs(psp); - return ret; } From 44035ec2fde1114254ee465f9ba3bb246b0b6283 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 14 Nov 2022 17:20:45 -0500 Subject: [PATCH 639/963] drm/amd/dc/dce120: Fix audio register mapping, stop triggering KASAN There's been a very long running bug that seems to have been neglected for a while, where amdgpu consistently triggers a KASAN error at start: BUG: KASAN: global-out-of-bounds in read_indirect_azalia_reg+0x1d4/0x2a0 [amdgpu] Read of size 4 at addr ffffffffc2274b28 by task modprobe/1889 After digging through amd's rather creative method for accessing registers, I eventually discovered the problem likely has to do with the fact that on my dce120 GPU there are supposedly 7 sets of audio registers. But we only define a register mapping for 6 sets. So, fix this and fix the KASAN warning finally. Signed-off-by: Lyude Paul Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 1b70b78e2fa1..af631085e88c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -359,7 +359,8 @@ static const struct dce_audio_registers audio_regs[] = { audio_regs(2), audio_regs(3), audio_regs(4), - audio_regs(5) + audio_regs(5), + audio_regs(6), }; #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\ From 5d82c82f1dbee264f7a94587adbbfee607706902 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Mon, 7 Nov 2022 15:18:47 -0500 Subject: [PATCH 640/963] drm/amd/display: Update soc bounding box for dcn32/dcn321 [Description] New values for soc bounding box and dummy pstate. Reviewed-by: Jun Lei Acked-by: Brian Chang Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 6 +++--- drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 12e17bcfca3f..9ca8120461a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -157,7 +157,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) @@ -211,7 +211,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) /* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */ if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) { clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true; - clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38; + clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us; @@ -221,7 +221,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr) clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz; clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF; clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16; - clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38; + clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50; clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16; clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9; clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c index 432b4ecd01a7..f4b176599be7 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c @@ -126,9 +126,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .sr_enter_plus_exit_z8_time_us = 320, .writeback_latency_us = 12.0, .round_trip_ping_latency_dcfclk_cycles = 263, - .urgent_latency_pixel_data_only_us = 9.35, - .urgent_latency_pixel_mixed_with_vm_data_us = 9.35, - .urgent_latency_vm_data_only_us = 9.35, + .urgent_latency_pixel_data_only_us = 4, + .urgent_latency_pixel_mixed_with_vm_data_us = 4, + .urgent_latency_vm_data_only_us = 4, .fclk_change_latency_us = 20, .usr_retraining_latency_us = 2, .smn_latency_us = 2, @@ -156,7 +156,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = { .dispclk_dppclk_vco_speed_mhz = 4300.0, .do_urgent_latency_adjustment = true, .urgent_latency_adjustment_fabric_clock_component_us = 1.0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 1000, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000, }; static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry) From dd2c028c1395d622df7ddd6837f8ab2dc94008ee Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 10 Nov 2022 12:13:47 -0500 Subject: [PATCH 641/963] drm/amd/display: Use viewport height for subvp mall allocation size [WHY?] MALL allocation size depends on the viewport height, not the addressable vertical lines, which will not match when scaling. [HOW?] Base MALL allocation size calculations off viewport height. Reviewed-by: Alvin Lee Reviewed-by: Martin Leung Acked-by: Brian Chang Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c index b03a7814e96d..fa3778849db1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c @@ -111,7 +111,7 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat mall_alloc_width_blk_aligned = full_vp_width_blk_aligned; /* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */ - mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) / + mall_alloc_height_blk_aligned = (pipe->plane_res.scl_data.viewport.height - 1 + mblk_height - 1) / mblk_height * mblk_height + mblk_height; /* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c; From 2a5dd86a69ea5435f1a837bdb7fafcda609a7c91 Mon Sep 17 00:00:00 2001 From: Taimur Hassan Date: Fri, 11 Nov 2022 14:11:00 -0500 Subject: [PATCH 642/963] drm/amd/display: Avoid setting pixel rate divider to N/A [Why] Pixel rate divider values should never be set to N/A (0xF) as the K1/K2 field is only 1/2 bits wide. [How] Set valid divider values for virtual and FRL/DP2 cases. Reviewed-by: Nicholas Kazlauskas Acked-by: Brian Chang Signed-off-by: Taimur Hassan Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c | 7 +++++++ drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c | 6 ++---- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c | 4 +++- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 4 +--- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c index 1bd7e0f327d8..389a8938ee45 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c @@ -96,6 +96,13 @@ static void dccg314_set_pixel_rate_div( struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA; + // Don't program 0xF into the register field. Not valid since + // K1 / K2 field is only 1 / 2 bits wide + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); + return; + } + dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA || (k1 == cur_k1 && k2 == cur_k2)) return; diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c index 588c1c71241f..a0741794db62 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c @@ -348,10 +348,8 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; @@ -359,7 +357,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig *k2_div = PIXEL_RATE_DIV_BY_2; else *k2_div = PIXEL_RATE_DIV_BY_4; - } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { + } else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) { if (two_pix_per_container) { *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_2; diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c index e4daed44ef5f..df4f25119142 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c @@ -96,8 +96,10 @@ static void dccg32_set_pixel_rate_div( // Don't program 0xF into the register field. Not valid since // K1 / K2 field is only 1 / 2 bits wide - if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) + if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) { + BREAK_TO_DEBUGGER(); return; + } dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2); if (k1 == cur_k1 && k2 == cur_k2) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index ac41a763bb1d..d0b46a3e0155 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1171,10 +1171,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); - if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL) - return odm_combine_factor; - if (is_dp_128b_132b_signal(pipe_ctx)) { + *k1_div = PIXEL_RATE_DIV_BY_1; *k2_div = PIXEL_RATE_DIV_BY_1; } else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) { *k1_div = PIXEL_RATE_DIV_BY_1; From e667ee3b0c049bf0c69426879586a2572bb28d26 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 11 Nov 2022 14:06:58 -0500 Subject: [PATCH 643/963] drm/amd/display: Use new num clk levels struct for max mclk index [WHY?] When calculating watermark and dlg values, the max mclk level index and associated speed are needed to find the correlated dummy latency value. Currently the incorrect index is given due to a clock manager refactor. [HOW?] Use num_memclk_level from num_entries_per_clk struct for getting the correct max mem speed. Reviewed-by: Jun Lei Acked-by: Brian Chang Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index 9ca8120461a7..2abe3967f7fb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -1910,7 +1910,7 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context, if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] == dm_dram_clock_change_unsupported) { - int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1; + int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1; min_dram_speed_mts = dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16; From a26a54fbe32b564ff868710d59fbe1a387a2cc7c Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Thu, 10 Nov 2022 14:40:20 -0500 Subject: [PATCH 644/963] drm/amd/display: Fix rotated cursor offset calculation [Why] Underflow is observed when cursor is still enabled when the cursor rectangle is outside the bounds of it's surface viewport. [How] Update parameters used to determine when cursor should be disabled. Reviewed-by: Martin Leung Acked-by: Brian Chang Signed-off-by: David Galiffi Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c | 34 +++++++++++++------ .../gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c | 28 ++++++++++----- .../gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c | 32 +++++++++++------ 3 files changed, 64 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c index b9765b3899e1..ef52e6b6eccf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c @@ -436,34 +436,48 @@ void dpp1_set_cursor_position( uint32_t height) { struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; + int x_hotspot = pos->x_hotspot; + int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; + int cursor_height = (int)height; + int cursor_width = (int)width; uint32_t cur_en = pos->enable ? 1 : 0; - // Cursor width/height and hotspots need to be rotated for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { - swap(width, height); + swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } if (src_x_offset >= (int)param->viewport.width) cur_en = 0; /* not visible beyond right edge*/ - if (src_x_offset + (int)width <= 0) + if (src_x_offset + cursor_width <= 0) cur_en = 0; /* not visible beyond left edge*/ if (src_y_offset >= (int)param->viewport.height) cur_en = 0; /* not visible beyond bottom edge*/ - if (src_y_offset + (int)height <= 0) + if (src_y_offset + cursor_height <= 0) cur_en = 0; /* not visible beyond top edge*/ REG_UPDATE(CURSOR0_CONTROL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 52e201e9b091..a142a00bc432 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1179,10 +1179,12 @@ void hubp1_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -1200,18 +1202,26 @@ void hubp1_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1248,8 +1258,8 @@ void hubp1_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 938dba5249d4..4566bc7abf17 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -973,10 +973,12 @@ void hubp2_cursor_set_position( const struct dc_cursor_mi_param *param) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x; - int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y; + int x_pos = pos->x - param->viewport.x; + int y_pos = pos->y - param->viewport.y; int x_hotspot = pos->x_hotspot; int y_hotspot = pos->y_hotspot; + int src_x_offset = x_pos - pos->x_hotspot; + int src_y_offset = y_pos - pos->y_hotspot; int cursor_height = (int)hubp->curs_attr.height; int cursor_width = (int)hubp->curs_attr.width; uint32_t dst_x_offset; @@ -994,18 +996,26 @@ void hubp2_cursor_set_position( if (hubp->curs_attr.address.quad_part == 0) return; - // Rotated cursor width/height and hotspots tweaks for offset calculation + // Transform cursor width / height and hotspots for offset calculations if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) { swap(cursor_height, cursor_width); + swap(x_hotspot, y_hotspot); + if (param->rotation == ROTATION_ANGLE_90) { - src_x_offset = pos->x - pos->y_hotspot - param->viewport.x; - src_y_offset = pos->y - pos->x_hotspot - param->viewport.y; + // hotspot = (-y, x) + src_x_offset = x_pos - (cursor_width - x_hotspot); + src_y_offset = y_pos - y_hotspot; + } else if (param->rotation == ROTATION_ANGLE_270) { + // hotspot = (y, -x) + src_x_offset = x_pos - x_hotspot; + src_y_offset = y_pos - (cursor_height - y_hotspot); } } else if (param->rotation == ROTATION_ANGLE_180) { + // hotspot = (-x, -y) if (!param->mirror) - src_x_offset = pos->x - param->viewport.x; + src_x_offset = x_pos - (cursor_width - x_hotspot); - src_y_offset = pos->y - param->viewport.y; + src_y_offset = y_pos - (cursor_height - y_hotspot); } dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0; @@ -1042,8 +1052,8 @@ void hubp2_cursor_set_position( CURSOR_Y_POSITION, pos->y); REG_SET_2(CURSOR_HOT_SPOT, 0, - CURSOR_HOT_SPOT_X, x_hotspot, - CURSOR_HOT_SPOT_Y, y_hotspot); + CURSOR_HOT_SPOT_X, pos->x_hotspot, + CURSOR_HOT_SPOT_Y, pos->y_hotspot); REG_SET(CURSOR_DST_OFFSET, 0, CURSOR_DST_X_OFFSET, dst_x_offset); @@ -1052,8 +1062,8 @@ void hubp2_cursor_set_position( hubp->pos.cur_ctl.bits.cur_enable = cur_en; hubp->pos.position.bits.x_pos = pos->x; hubp->pos.position.bits.y_pos = pos->y; - hubp->pos.hot_spot.bits.x_hot = x_hotspot; - hubp->pos.hot_spot.bits.y_hot = y_hotspot; + hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot; + hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot; hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset; /* Cursor Rectangle Cache * Cursor bitmaps have different hotspot values From f2e1aa267f12b82e03927d1e918d2844ddd3eea5 Mon Sep 17 00:00:00 2001 From: lyndonli Date: Mon, 21 Nov 2022 09:08:42 +0800 Subject: [PATCH 645/963] drm/amd/pm: update driver if header for smu_13_0_7 update driver if header for smu_13_0_7 Signed-off-by: lyndonli Reviewed-by: Hawking Zhang Reviewed-by: Kenneth Feng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- .../inc/pmfw_if/smu13_driver_if_v13_0_7.h | 117 ++++++++++++------ drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 2 +- 2 files changed, 80 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h index 25c08f963f49..d6b13933a98f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h @@ -25,10 +25,10 @@ // *** IMPORTANT *** // PMFW TEAM: Always increment the interface version on any change to this file -#define SMU13_DRIVER_IF_VERSION 0x2C +#define SMU13_DRIVER_IF_VERSION 0x35 //Increment this version if SkuTable_t or BoardTable_t change -#define PPTABLE_VERSION 0x20 +#define PPTABLE_VERSION 0x27 #define NUM_GFXCLK_DPM_LEVELS 16 #define NUM_SOCCLK_DPM_LEVELS 8 @@ -96,7 +96,7 @@ #define FEATURE_MEM_TEMP_READ_BIT 47 #define FEATURE_ATHUB_MMHUB_PG_BIT 48 #define FEATURE_SOC_PCC_BIT 49 -#define FEATURE_SPARE_50_BIT 50 +#define FEATURE_EDC_PWRBRK_BIT 50 #define FEATURE_SPARE_51_BIT 51 #define FEATURE_SPARE_52_BIT 52 #define FEATURE_SPARE_53_BIT 53 @@ -282,15 +282,15 @@ typedef enum { } I2cControllerPort_e; typedef enum { - I2C_CONTROLLER_NAME_VR_GFX = 0, - I2C_CONTROLLER_NAME_VR_SOC, - I2C_CONTROLLER_NAME_VR_VMEMP, - I2C_CONTROLLER_NAME_VR_VDDIO, - I2C_CONTROLLER_NAME_LIQUID0, - I2C_CONTROLLER_NAME_LIQUID1, - I2C_CONTROLLER_NAME_PLX, - I2C_CONTROLLER_NAME_OTHER, - I2C_CONTROLLER_NAME_COUNT, + I2C_CONTROLLER_NAME_VR_GFX = 0, + I2C_CONTROLLER_NAME_VR_SOC, + I2C_CONTROLLER_NAME_VR_VMEMP, + I2C_CONTROLLER_NAME_VR_VDDIO, + I2C_CONTROLLER_NAME_LIQUID0, + I2C_CONTROLLER_NAME_LIQUID1, + I2C_CONTROLLER_NAME_PLX, + I2C_CONTROLLER_NAME_FAN_INTAKE, + I2C_CONTROLLER_NAME_COUNT, } I2cControllerName_e; typedef enum { @@ -302,6 +302,7 @@ typedef enum { I2C_CONTROLLER_THROTTLER_LIQUID0, I2C_CONTROLLER_THROTTLER_LIQUID1, I2C_CONTROLLER_THROTTLER_PLX, + I2C_CONTROLLER_THROTTLER_FAN_INTAKE, I2C_CONTROLLER_THROTTLER_INA3221, I2C_CONTROLLER_THROTTLER_COUNT, } I2cControllerThrottler_e; @@ -309,8 +310,9 @@ typedef enum { typedef enum { I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5, I2C_CONTROLLER_PROTOCOL_VR_IR35217, - I2C_CONTROLLER_PROTOCOL_TMP_TMP102A, + I2C_CONTROLLER_PROTOCOL_TMP_MAX31875, I2C_CONTROLLER_PROTOCOL_INA3221, + I2C_CONTROLLER_PROTOCOL_TMP_MAX6604, I2C_CONTROLLER_PROTOCOL_COUNT, } I2cControllerProtocol_e; @@ -690,6 +692,9 @@ typedef struct { #define PP_OD_FEATURE_UCLK_BIT 8 #define PP_OD_FEATURE_ZERO_FAN_BIT 9 #define PP_OD_FEATURE_TEMPERATURE_BIT 10 +#define PP_OD_FEATURE_POWER_FEATURE_CTRL_BIT 11 +#define PP_OD_FEATURE_ASIC_TDC_BIT 12 +#define PP_OD_FEATURE_COUNT 13 typedef enum { PP_OD_POWER_FEATURE_ALWAYS_ENABLED, @@ -697,6 +702,11 @@ typedef enum { PP_OD_POWER_FEATURE_ALWAYS_DISABLED, } PP_OD_POWER_FEATURE_e; +typedef enum { + FAN_MODE_AUTO = 0, + FAN_MODE_MANUAL_LINEAR, +} FanMode_e; + typedef struct { uint32_t FeatureCtrlMask; @@ -708,8 +718,8 @@ typedef struct { uint8_t RuntimePwrSavingFeaturesCtrl; //Frequency changes - int16_t GfxclkFmin; // MHz - int16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -730,7 +740,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; uint32_t MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround } OverDriveTable_t; @@ -748,8 +763,8 @@ typedef struct { uint8_t IdlePwrSavingFeaturesCtrl; uint8_t RuntimePwrSavingFeaturesCtrl; - uint16_t GfxclkFmin; // MHz - uint16_t GfxclkFmax; // MHz + int16_t GfxclkFmin; // MHz + int16_t GfxclkFmax; // MHz uint16_t UclkFmin; // MHz uint16_t UclkFmax; // MHz @@ -769,7 +784,12 @@ typedef struct { uint8_t MaxOpTemp; uint8_t Padding[4]; - uint32_t Spare[12]; + uint16_t GfxVoltageFullCtrlMode; + uint16_t GfxclkFullCtrlMode; + uint16_t UclkFullCtrlMode; + int16_t AsicTdc; + + uint32_t Spare[10]; } OverDriveLimits_t; @@ -903,7 +923,8 @@ typedef struct { uint16_t FanStartTempMin; uint16_t FanStartTempMax; - uint32_t Spare[12]; + uint16_t PowerMinPpt0[POWER_SOURCE_COUNT]; + uint32_t Spare[11]; } MsgLimits_t; @@ -1086,11 +1107,13 @@ typedef struct { uint32_t GfxoffSpare[15]; // GFX GPO - float DfllBtcMasterScalerM; + uint32_t DfllBtcMasterScalerM; int32_t DfllBtcMasterScalerB; - float DfllBtcSlaveScalerM; + uint32_t DfllBtcSlaveScalerM; int32_t DfllBtcSlaveScalerB; - uint32_t GfxGpoSpare[12]; + uint32_t DfllPccAsWaitCtrl; //GDFLL_AS_WAIT_CTRL_PCC register value to be passed to RLC msg + uint32_t DfllPccAsStepCtrl; //GDFLL_AS_STEP_CTRL_PCC register value to be passed to RLC msg + uint32_t GfxGpoSpare[10]; // GFX DCS @@ -1106,7 +1129,10 @@ typedef struct { uint16_t DcsTimeout; //This is the amount of time SMU FW waits for RLC to put GFX into GFXOFF before reverting to the fallback mechanism of throttling GFXCLK to Fmin. - uint32_t DcsSpare[16]; + uint32_t DcsSpare[14]; + + // UCLK section + uint16_t ShadowFreqTableUclk[NUM_UCLK_DPM_LEVELS]; // In MHz // UCLK section uint8_t UseStrobeModeOptimizations; //Set to indicate that FW should use strobe mode optimizations @@ -1163,13 +1189,14 @@ typedef struct { uint16_t IntakeTempHighIntakeAcousticLimit; uint16_t IntakeTempAcouticLimitReleaseRate; - uint16_t FanStalledTempLimitOffset; + int16_t FanAbnormalTempLimitOffset; uint16_t FanStalledTriggerRpm; - uint16_t FanAbnormalTriggerRpm; - uint16_t FanPadding; - - uint32_t FanSpare[14]; + uint16_t FanAbnormalTriggerRpmCoeff; + uint16_t FanAbnormalDetectionEnable; + uint8_t FanIntakeSensorSupport; + uint8_t FanIntakePadding[3]; + uint32_t FanSpare[13]; // SECTION: VDD_GFX AVFS uint8_t OverrideGfxAvfsFuses; @@ -1193,7 +1220,6 @@ typedef struct { uint32_t dGbV_dT_vmin; uint32_t dGbV_dT_vmax; - //Unused: PMFW-9370 uint32_t V2F_vmin_range_low; uint32_t V2F_vmin_range_high; uint32_t V2F_vmax_range_low; @@ -1238,8 +1264,21 @@ typedef struct { // SECTION: Advanced Options uint32_t DebugOverrides; + // Section: Total Board Power idle vs active coefficients + uint8_t TotalBoardPowerSupport; + uint8_t TotalBoardPowerPadding[3]; + + int16_t TotalIdleBoardPowerM; + int16_t TotalIdleBoardPowerB; + int16_t TotalBoardPowerM; + int16_t TotalBoardPowerB; + + QuadraticInt_t qFeffCoeffGameClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBaseClock[POWER_SOURCE_COUNT]; + QuadraticInt_t qFeffCoeffBoostClock[POWER_SOURCE_COUNT]; + // SECTION: Sku Reserved - uint32_t Spare[64]; + uint32_t Spare[43]; // Padding for MMHUB - do not modify this uint32_t MmHubPadding[8]; @@ -1304,7 +1343,8 @@ typedef struct { // SECTION: Clock Spread Spectrum // UCLK Spread Spectrum - uint16_t UclkSpreadPadding; + uint8_t UclkTrainingModeSpreadPercent; // Q4.4 + uint8_t UclkSpreadPadding; uint16_t UclkSpreadFreq; // kHz // UCLK Spread Spectrum @@ -1317,11 +1357,7 @@ typedef struct { // Section: Memory Config uint8_t DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e - uint8_t PaddingMem1[3]; - - // Section: Total Board Power - uint16_t TotalBoardPower; //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power - uint16_t BoardPowerPadding; + uint8_t PaddingMem1[7]; // SECTION: UMC feature flags uint8_t HsrEnabled; @@ -1423,8 +1459,11 @@ typedef struct { uint16_t Vcn1ActivityPercentage ; uint32_t EnergyAccumulator; - uint16_t AverageSocketPower ; + uint16_t AverageSocketPower; + uint16_t AverageTotalBoardPower; + uint16_t AvgTemperature[TEMP_COUNT]; + uint16_t AvgTemperatureFanIntake; uint8_t PcieRate ; uint8_t PcieWidth ; @@ -1592,5 +1631,7 @@ typedef struct { #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D0 0x5 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D3 0x6 #define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING 0x7 +#define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL 0x8 +#define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY 0x9 #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index b7f4569aff2a..865d6358918d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -31,7 +31,7 @@ #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32 -#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C +#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500 //500ms From 3cb93f390453cde4d6afda1587aaa00e75e09617 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Wed, 16 Nov 2022 17:08:22 +0800 Subject: [PATCH 646/963] drm/amdgpu: fix use-after-free during gpu recovery [Why] [ 754.862560] refcount_t: underflow; use-after-free. [ 754.862898] Call Trace: [ 754.862903] [ 754.862913] amdgpu_job_free_cb+0xc2/0xe1 [amdgpu] [ 754.863543] drm_sched_main.cold+0x34/0x39 [amd_sched] [How] The fw_fence may be not init, check whether dma_fence_init is performed before job free Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index cd968e781077..f4a3122352de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -169,7 +169,11 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sched_sync); - dma_fence_put(&job->hw_fence); + /* only put the hw fence if has embedded fence */ + if (!job->hw_fence.ops) + kfree(job); + else + dma_fence_put(&job->hw_fence); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, From a6e1775da04ab042bc9e2e42399fa25714c253da Mon Sep 17 00:00:00 2001 From: Tsung-hua Lin Date: Wed, 9 Nov 2022 12:54:22 +0800 Subject: [PATCH 647/963] drm/amd/display: No display after resume from WB/CB [why] First MST sideband message returns AUX_RET_ERROR_HPD_DISCON on certain intel platform. Aux transaction considered failure if HPD unexpected pulled low. The actual aux transaction success in such case, hence do not return error. [how] Not returning error when AUX_RET_ERROR_HPD_DISCON detected on the first sideband message. v2: squash in fix (Alex) Reviewed-by: Jerry Zuo Acked-by: Brian Chang Signed-off-by: Tsung-hua Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 636e6d640b4e..512c32327eb1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1372,7 +1372,44 @@ static const struct dmi_system_id hpd_disconnect_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), + }, + }, {} + /* TODO: refactor this from a fixed table to a dynamic option */ }; static void retrieve_dmi_info(struct amdgpu_display_manager *dm) From 602ad43c3cd8f15cbb25ce9bb494129edb2024ed Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Nov 2022 12:34:14 -0500 Subject: [PATCH 648/963] drm/amdgpu: Partially revert "drm/amdgpu: update drm_display_info correctly when the edid is read" This partially reverts 20543be93ca45968f344261c1a997177e51bd7e1. Calling drm_connector_update_edid_property() in amdgpu_connector_free_edid() causes a noticeable pause in the system every 10 seconds on polled outputs so revert this part of the change. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2257 Cc: Claudio Suarez Acked-by: Luben Tuikov Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 491d4846fc02..cfb262911bfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -328,7 +328,6 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector) kfree(amdgpu_connector->edid); amdgpu_connector->edid = NULL; - drm_connector_update_edid_property(connector, NULL); } static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector) From ecb41b71ef90cf4741bcc3286b769dda746b67e6 Mon Sep 17 00:00:00 2001 From: Jane Jian Date: Wed, 16 Nov 2022 18:22:52 +0800 Subject: [PATCH 649/963] drm/amdgpu/vcn: re-use original vcn0 doorbell value root cause that S2A need to use deduct offset flag. after setting this flag, vcn0 doorbell value works. so return it as before Signed-off-by: Jane Jian Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h | 1 - drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 9 +-------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h index f772bb499f3e..0312c71c3af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -32,7 +32,6 @@ #define RB_ENABLED (1 << 0) #define RB4_ENABLED (1 << 1) -#define MMSCH_DOORBELL_OFFSET 0x8 #define MMSCH_VF_ENGINE_STATUS__PASS 0x1 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 897a5ce9c9da..dcc49b01bd59 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -100,7 +100,6 @@ static int vcn_v4_0_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - int vcn_doorbell_index = 0; r = amdgpu_vcn_sw_init(adev); if (r) @@ -112,12 +111,6 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; - if (amdgpu_sriov_vf(adev)) { - vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET; - /* get DWORD offset */ - vcn_doorbell_index = vcn_doorbell_index << 1; - } - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -135,7 +128,7 @@ static int vcn_v4_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) - ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; From 9ac74f0666ceab0b1047e9d59be846a3345e4e98 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 14 Nov 2022 11:08:29 +0000 Subject: [PATCH 650/963] s390/ap: fix memory leak in ap_init_qci_info() If kzalloc() for 'ap_qci_info_old' failed, 'ap_qci_info' shold be freed before return. Otherwise it is a memory leak. Link: https://lore.kernel.org/r/20221114110830.542246-1-weiyongjun@huaweicloud.com Fixes: 283915850a44 ("s390/ap: notify drivers on config changed and scan complete callbacks") Signed-off-by: Wei Yongjun Signed-off-by: Harald Freudenberger Signed-off-by: Alexander Gordeev --- drivers/s390/crypto/ap_bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 59ac98f2bd27..b02c631f3b71 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -233,8 +233,11 @@ static void __init ap_init_qci_info(void) if (!ap_qci_info) return; ap_qci_info_old = kzalloc(sizeof(*ap_qci_info_old), GFP_KERNEL); - if (!ap_qci_info_old) + if (!ap_qci_info_old) { + kfree(ap_qci_info); + ap_qci_info = NULL; return; + } if (ap_fetch_qci_info(ap_qci_info) != 0) { kfree(ap_qci_info); kfree(ap_qci_info_old); From 8fe97d47b52ae1ad130470b1780f0ded4ba609a4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 20 Nov 2022 13:43:03 +0100 Subject: [PATCH 651/963] btrfs: use kvcalloc in btrfs_get_dev_zone_info Otherwise the kernel memory allocator seems to be unhappy about failing order 6 allocations for the zones array, that cause 100% reproducible mount failures in my qemu setup: [26.078981] mount: page allocation failure: order:6, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null) [26.079741] CPU: 0 PID: 2965 Comm: mount Not tainted 6.1.0-rc5+ #185 [26.080181] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [26.080950] Call Trace: [26.081132] [26.081291] dump_stack_lvl+0x56/0x6f [26.081554] warn_alloc+0x117/0x140 [26.081808] ? __alloc_pages_direct_compact+0x1b5/0x300 [26.082174] __alloc_pages_slowpath.constprop.0+0xd0e/0xde0 [26.082569] __alloc_pages+0x32a/0x340 [26.082836] __kmalloc_large_node+0x4d/0xa0 [26.083133] ? trace_kmalloc+0x29/0xd0 [26.083399] kmalloc_large+0x14/0x60 [26.083654] btrfs_get_dev_zone_info+0x1b9/0xc00 [26.083980] ? _raw_spin_unlock_irqrestore+0x28/0x50 [26.084328] btrfs_get_dev_zone_info_all_devices+0x54/0x80 [26.084708] open_ctree+0xed4/0x1654 [26.084974] btrfs_mount_root.cold+0x12/0xde [26.085288] ? lock_is_held_type+0xe2/0x140 [26.085603] legacy_get_tree+0x28/0x50 [26.085876] vfs_get_tree+0x1d/0xb0 [26.086139] vfs_kern_mount.part.0+0x6c/0xb0 [26.086456] btrfs_mount+0x118/0x3a0 [26.086728] ? lock_is_held_type+0xe2/0x140 [26.087043] legacy_get_tree+0x28/0x50 [26.087323] vfs_get_tree+0x1d/0xb0 [26.087587] path_mount+0x2ba/0xbe0 [26.087850] ? _raw_spin_unlock_irqrestore+0x38/0x50 [26.088217] __x64_sys_mount+0xfe/0x140 [26.088506] do_syscall_64+0x35/0x80 [26.088776] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 5b316468983d ("btrfs: get zone information of zoned block devices") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 44d8177eeffc..c9e2b0c85309 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -467,7 +467,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) goto out; } - zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); + zones = kvcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); if (!zones) { ret = -ENOMEM; goto out; @@ -586,7 +586,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) } - kfree(zones); + kvfree(zones); switch (bdev_zoned_model(bdev)) { case BLK_ZONED_HM: @@ -618,7 +618,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) return 0; out: - kfree(zones); + kvfree(zones); out_free_zone_info: btrfs_destroy_dev_zone_info(device); From 796787c978efbbdb50e245718c784eb94f59eac4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 21 Nov 2022 10:23:22 +0000 Subject: [PATCH 652/963] btrfs: do not modify log tree while holding a leaf from fs tree locked When logging an inode in full mode, or when logging xattrs or when logging the dir index items of a directory, we are modifying the log tree while holding a read lock on a leaf from the fs/subvolume tree. This can lead to a deadlock in rare circumstances, but it is a real possibility, and it was recently reported by syzbot with the following trace from lockdep: WARNING: possible circular locking dependency detected 6.1.0-rc5-next-20221116-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor.1/16154 is trying to acquire lock: ffff88807e3084a0 (&delayed_node->mutex){+.+.}-{3:3}, at: __btrfs_release_delayed_node.part.0+0xa1/0xf30 fs/btrfs/delayed-inode.c:256 but task is already holding lock: ffff88807df33078 (btrfs-log-00){++++}-{3:3}, at: __btrfs_tree_lock+0x32/0x3d0 fs/btrfs/locking.c:197 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (btrfs-log-00){++++}-{3:3}: down_read_nested+0x9e/0x450 kernel/locking/rwsem.c:1634 __btrfs_tree_read_lock+0x32/0x350 fs/btrfs/locking.c:135 btrfs_tree_read_lock fs/btrfs/locking.c:141 [inline] btrfs_read_lock_root_node+0x82/0x3a0 fs/btrfs/locking.c:280 btrfs_search_slot_get_root fs/btrfs/ctree.c:1678 [inline] btrfs_search_slot+0x3ca/0x2c70 fs/btrfs/ctree.c:1998 btrfs_lookup_csum+0x116/0x3f0 fs/btrfs/file-item.c:209 btrfs_csum_file_blocks+0x40e/0x1370 fs/btrfs/file-item.c:1021 log_csums.isra.0+0x244/0x2d0 fs/btrfs/tree-log.c:4258 copy_items.isra.0+0xbfb/0xed0 fs/btrfs/tree-log.c:4403 copy_inode_items_to_log+0x13d6/0x1d90 fs/btrfs/tree-log.c:5873 btrfs_log_inode+0xb19/0x4680 fs/btrfs/tree-log.c:6495 btrfs_log_inode_parent+0x890/0x2a20 fs/btrfs/tree-log.c:6982 btrfs_log_dentry_safe+0x59/0x80 fs/btrfs/tree-log.c:7083 btrfs_sync_file+0xa41/0x13c0 fs/btrfs/file.c:1921 vfs_fsync_range+0x13e/0x230 fs/sync.c:188 generic_write_sync include/linux/fs.h:2856 [inline] iomap_dio_complete+0x73a/0x920 fs/iomap/direct-io.c:128 btrfs_direct_write fs/btrfs/file.c:1536 [inline] btrfs_do_write_iter+0xba2/0x1470 fs/btrfs/file.c:1668 call_write_iter include/linux/fs.h:2160 [inline] do_iter_readv_writev+0x20b/0x3b0 fs/read_write.c:735 do_iter_write+0x182/0x700 fs/read_write.c:861 vfs_iter_write+0x74/0xa0 fs/read_write.c:902 iter_file_splice_write+0x745/0xc90 fs/splice.c:686 do_splice_from fs/splice.c:764 [inline] direct_splice_actor+0x114/0x180 fs/splice.c:931 splice_direct_to_actor+0x335/0x8a0 fs/splice.c:886 do_splice_direct+0x1ab/0x280 fs/splice.c:974 do_sendfile+0xb19/0x1270 fs/read_write.c:1255 __do_sys_sendfile64 fs/read_write.c:1323 [inline] __se_sys_sendfile64 fs/read_write.c:1309 [inline] __x64_sys_sendfile64+0x259/0x2c0 fs/read_write.c:1309 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd -> #1 (btrfs-tree-00){++++}-{3:3}: __lock_release kernel/locking/lockdep.c:5382 [inline] lock_release+0x371/0x810 kernel/locking/lockdep.c:5688 up_write+0x2a/0x520 kernel/locking/rwsem.c:1614 btrfs_tree_unlock_rw fs/btrfs/locking.h:189 [inline] btrfs_unlock_up_safe+0x1e3/0x290 fs/btrfs/locking.c:238 search_leaf fs/btrfs/ctree.c:1832 [inline] btrfs_search_slot+0x265e/0x2c70 fs/btrfs/ctree.c:2074 btrfs_insert_empty_items+0xbd/0x1c0 fs/btrfs/ctree.c:4133 btrfs_insert_delayed_item+0x826/0xfa0 fs/btrfs/delayed-inode.c:746 btrfs_insert_delayed_items fs/btrfs/delayed-inode.c:824 [inline] __btrfs_commit_inode_delayed_items fs/btrfs/delayed-inode.c:1111 [inline] __btrfs_run_delayed_items+0x280/0x590 fs/btrfs/delayed-inode.c:1153 flush_space+0x147/0xe90 fs/btrfs/space-info.c:728 btrfs_async_reclaim_metadata_space+0x541/0xc10 fs/btrfs/space-info.c:1086 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 -> #0 (&delayed_node->mutex){+.+.}-{3:3}: check_prev_add kernel/locking/lockdep.c:3097 [inline] check_prevs_add kernel/locking/lockdep.c:3216 [inline] validate_chain kernel/locking/lockdep.c:3831 [inline] __lock_acquire+0x2a43/0x56d0 kernel/locking/lockdep.c:5055 lock_acquire kernel/locking/lockdep.c:5668 [inline] lock_acquire+0x1e3/0x630 kernel/locking/lockdep.c:5633 __mutex_lock_common kernel/locking/mutex.c:603 [inline] __mutex_lock+0x12f/0x1360 kernel/locking/mutex.c:747 __btrfs_release_delayed_node.part.0+0xa1/0xf30 fs/btrfs/delayed-inode.c:256 __btrfs_release_delayed_node fs/btrfs/delayed-inode.c:251 [inline] btrfs_release_delayed_node fs/btrfs/delayed-inode.c:281 [inline] btrfs_remove_delayed_node+0x52/0x60 fs/btrfs/delayed-inode.c:1285 btrfs_evict_inode+0x511/0xf30 fs/btrfs/inode.c:5554 evict+0x2ed/0x6b0 fs/inode.c:664 dispose_list+0x117/0x1e0 fs/inode.c:697 prune_icache_sb+0xeb/0x150 fs/inode.c:896 super_cache_scan+0x391/0x590 fs/super.c:106 do_shrink_slab+0x464/0xce0 mm/vmscan.c:843 shrink_slab_memcg mm/vmscan.c:912 [inline] shrink_slab+0x388/0x660 mm/vmscan.c:991 shrink_node_memcgs mm/vmscan.c:6088 [inline] shrink_node+0x93d/0x1f30 mm/vmscan.c:6117 shrink_zones mm/vmscan.c:6355 [inline] do_try_to_free_pages+0x3b4/0x17a0 mm/vmscan.c:6417 try_to_free_mem_cgroup_pages+0x3a4/0xa70 mm/vmscan.c:6732 reclaim_high.constprop.0+0x182/0x230 mm/memcontrol.c:2393 mem_cgroup_handle_over_high+0x190/0x520 mm/memcontrol.c:2578 try_charge_memcg+0xe0c/0x12f0 mm/memcontrol.c:2816 try_charge mm/memcontrol.c:2827 [inline] charge_memcg+0x90/0x3b0 mm/memcontrol.c:6889 __mem_cgroup_charge+0x2b/0x90 mm/memcontrol.c:6910 mem_cgroup_charge include/linux/memcontrol.h:667 [inline] __filemap_add_folio+0x615/0xf80 mm/filemap.c:852 filemap_add_folio+0xaf/0x1e0 mm/filemap.c:934 __filemap_get_folio+0x389/0xd80 mm/filemap.c:1976 pagecache_get_page+0x2e/0x280 mm/folio-compat.c:104 find_or_create_page include/linux/pagemap.h:612 [inline] alloc_extent_buffer+0x2b9/0x1580 fs/btrfs/extent_io.c:4588 btrfs_init_new_buffer fs/btrfs/extent-tree.c:4869 [inline] btrfs_alloc_tree_block+0x2e1/0x1320 fs/btrfs/extent-tree.c:4988 __btrfs_cow_block+0x3b2/0x1420 fs/btrfs/ctree.c:440 btrfs_cow_block+0x2fa/0x950 fs/btrfs/ctree.c:595 btrfs_search_slot+0x11b0/0x2c70 fs/btrfs/ctree.c:2038 btrfs_update_root+0xdb/0x630 fs/btrfs/root-tree.c:137 update_log_root fs/btrfs/tree-log.c:2841 [inline] btrfs_sync_log+0xbfb/0x2870 fs/btrfs/tree-log.c:3064 btrfs_sync_file+0xdb9/0x13c0 fs/btrfs/file.c:1947 vfs_fsync_range+0x13e/0x230 fs/sync.c:188 generic_write_sync include/linux/fs.h:2856 [inline] iomap_dio_complete+0x73a/0x920 fs/iomap/direct-io.c:128 btrfs_direct_write fs/btrfs/file.c:1536 [inline] btrfs_do_write_iter+0xba2/0x1470 fs/btrfs/file.c:1668 call_write_iter include/linux/fs.h:2160 [inline] do_iter_readv_writev+0x20b/0x3b0 fs/read_write.c:735 do_iter_write+0x182/0x700 fs/read_write.c:861 vfs_iter_write+0x74/0xa0 fs/read_write.c:902 iter_file_splice_write+0x745/0xc90 fs/splice.c:686 do_splice_from fs/splice.c:764 [inline] direct_splice_actor+0x114/0x180 fs/splice.c:931 splice_direct_to_actor+0x335/0x8a0 fs/splice.c:886 do_splice_direct+0x1ab/0x280 fs/splice.c:974 do_sendfile+0xb19/0x1270 fs/read_write.c:1255 __do_sys_sendfile64 fs/read_write.c:1323 [inline] __se_sys_sendfile64 fs/read_write.c:1309 [inline] __x64_sys_sendfile64+0x259/0x2c0 fs/read_write.c:1309 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd other info that might help us debug this: Chain exists of: &delayed_node->mutex --> btrfs-tree-00 --> btrfs-log-00 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(btrfs-log-00); lock(btrfs-tree-00); lock(btrfs-log-00); lock(&delayed_node->mutex); Holding a read lock on a leaf from a fs/subvolume tree creates a nasty lock dependency when we are COWing extent buffers for the log tree and we have two tasks modifying the log tree, with each one in one of the following 2 scenarios: 1) Modifying the log tree triggers an extent buffer allocation while holding a write lock on a parent extent buffer from the log tree. Allocating the pages for an extent buffer, or the extent buffer struct, can trigger inode eviction and finally the inode eviction will trigger a release/remove of a delayed node, which requires taking the delayed node's mutex; 2) Allocating a metadata extent for a log tree can trigger the async reclaim thread and make us wait for it to release enough space and unblock our reservation ticket. The reclaim thread can start flushing delayed items, and that in turn results in the need to lock delayed node mutexes and in the need to write lock extent buffers of a subvolume tree - all this while holding a write lock on the parent extent buffer in the log tree. So one task in scenario 1) running in parallel with another task in scenario 2) could lead to a deadlock, one wanting to lock a delayed node mutex while having a read lock on a leaf from the subvolume, while the other is holding the delayed node's mutex and wants to write lock the same subvolume leaf for flushing delayed items. Fix this by cloning the leaf of the fs/subvolume tree, release/unlock the fs/subvolume leaf and use the clone leaf instead. Reported-by: syzbot+9b7c21f486f5e7f8d029@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/000000000000ccc93c05edc4d8cf@google.com/ CC: stable@vger.kernel.org # 6.0+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 59 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 813986e38258..c3cf3dabe0b1 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3694,15 +3694,29 @@ static int process_dir_items_leaf(struct btrfs_trans_handle *trans, u64 *last_old_dentry_offset) { struct btrfs_root *log = inode->root->log_root; - struct extent_buffer *src = path->nodes[0]; - const int nritems = btrfs_header_nritems(src); + struct extent_buffer *src; + const int nritems = btrfs_header_nritems(path->nodes[0]); const u64 ino = btrfs_ino(inode); bool last_found = false; int batch_start = 0; int batch_size = 0; int i; - for (i = path->slots[0]; i < nritems; i++) { + /* + * We need to clone the leaf, release the read lock on it, and use the + * clone before modifying the log tree. See the comment at copy_items() + * about why we need to do this. + */ + src = btrfs_clone_extent_buffer(path->nodes[0]); + if (!src) + return -ENOMEM; + + i = path->slots[0]; + btrfs_release_path(path); + path->nodes[0] = src; + path->slots[0] = i; + + for (; i < nritems; i++) { struct btrfs_dir_item *di; struct btrfs_key key; int ret; @@ -4303,7 +4317,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, { struct btrfs_root *log = inode->root->log_root; struct btrfs_file_extent_item *extent; - struct extent_buffer *src = src_path->nodes[0]; + struct extent_buffer *src; int ret = 0; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -4314,6 +4328,43 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, const bool skip_csum = (inode->flags & BTRFS_INODE_NODATASUM); const u64 i_size = i_size_read(&inode->vfs_inode); + /* + * To keep lockdep happy and avoid deadlocks, clone the source leaf and + * use the clone. This is because otherwise we would be changing the log + * tree, to insert items from the subvolume tree or insert csum items, + * while holding a read lock on a leaf from the subvolume tree, which + * creates a nasty lock dependency when COWing log tree nodes/leaves: + * + * 1) Modifying the log tree triggers an extent buffer allocation while + * holding a write lock on a parent extent buffer from the log tree. + * Allocating the pages for an extent buffer, or the extent buffer + * struct, can trigger inode eviction and finally the inode eviction + * will trigger a release/remove of a delayed node, which requires + * taking the delayed node's mutex; + * + * 2) Allocating a metadata extent for a log tree can trigger the async + * reclaim thread and make us wait for it to release enough space and + * unblock our reservation ticket. The reclaim thread can start + * flushing delayed items, and that in turn results in the need to + * lock delayed node mutexes and in the need to write lock extent + * buffers of a subvolume tree - all this while holding a write lock + * on the parent extent buffer in the log tree. + * + * So one task in scenario 1) running in parallel with another task in + * scenario 2) could lead to a deadlock, one wanting to lock a delayed + * node mutex while having a read lock on a leaf from the subvolume, + * while the other is holding the delayed node's mutex and wants to + * write lock the same subvolume leaf for flushing delayed items. + */ + src = btrfs_clone_extent_buffer(src_path->nodes[0]); + if (!src) + return -ENOMEM; + + i = src_path->slots[0]; + btrfs_release_path(src_path); + src_path->nodes[0] = src; + src_path->slots[0] = i; + ins_data = kmalloc(nr * sizeof(struct btrfs_key) + nr * sizeof(u32), GFP_NOFS); if (!ins_data) From ffdbb44f2f23f963b8f5672e35c3a26088177a62 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 22 Nov 2022 19:50:02 +0800 Subject: [PATCH 653/963] btrfs: sysfs: normalize the error handling branch in btrfs_init_sysfs() Although kset_unregister() can eventually remove all attribute files, explicitly rolling back with the matching function makes the code logic look clearer. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Qu Wenruo Signed-off-by: Zhen Lei Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 699b54b3acaa..74fef1f49c35 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -2321,8 +2321,11 @@ int __init btrfs_init_sysfs(void) #ifdef CONFIG_BTRFS_DEBUG ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group); - if (ret) - goto out2; + if (ret) { + sysfs_unmerge_group(&btrfs_kset->kobj, + &btrfs_static_feature_attr_group); + goto out_remove_group; + } #endif return 0; From 8cfa238a48f34038464b99d0b4825238c2687181 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 14 Nov 2022 10:57:58 +0800 Subject: [PATCH 654/963] ixgbevf: Fix resource leak in ixgbevf_init_module() ixgbevf_init_module() won't destroy the workqueue created by create_singlethread_workqueue() when pci_register_driver() failed. Add destroy_workqueue() in fail path to prevent the resource leak. Similar to the handling of u132_hcd_init in commit f276e002793c ("usb: u132-hcd: fix resource leak") Fixes: 40a13e2493c9 ("ixgbevf: Use a private workqueue to avoid certain possible hangs") Signed-off-by: Shang XiaoJing Reviewed-by: Saeed Mahameed Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 99933e89717a..e338fa572793 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4869,6 +4869,8 @@ static struct pci_driver ixgbevf_driver = { **/ static int __init ixgbevf_init_module(void) { + int err; + pr_info("%s\n", ixgbevf_driver_string); pr_info("%s\n", ixgbevf_copyright); ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name); @@ -4877,7 +4879,13 @@ static int __init ixgbevf_init_module(void) return -ENOMEM; } - return pci_register_driver(&ixgbevf_driver); + err = pci_register_driver(&ixgbevf_driver); + if (err) { + destroy_workqueue(ixgbevf_wq); + return err; + } + + return 0; } module_init(ixgbevf_init_module); From 479dd06149425b9e00477f52200872587af76a48 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Wed, 16 Nov 2022 09:27:25 +0800 Subject: [PATCH 655/963] i40e: Fix error handling in i40e_init_module() i40e_init_module() won't free the debugfs directory created by i40e_dbg_init() when pci_register_driver() failed. Add fail path to call i40e_dbg_exit() to remove the debugfs entries to prevent the bug. i40e: Intel(R) Ethernet Connection XL710 Network Driver i40e: Copyright (c) 2013 - 2019 Intel Corporation. debugfs: Directory 'i40e' with parent '/' already present! Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Shang XiaoJing Reviewed-by: Leon Romanovsky Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b5dcd15ced36..b3cb587a2032 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -16644,6 +16644,8 @@ static struct pci_driver i40e_driver = { **/ static int __init i40e_init_module(void) { + int err; + pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string); pr_info("%s: %s\n", i40e_driver_name, i40e_copyright); @@ -16661,7 +16663,14 @@ static int __init i40e_init_module(void) } i40e_dbg_init(); - return pci_register_driver(&i40e_driver); + err = pci_register_driver(&i40e_driver); + if (err) { + destroy_workqueue(i40e_wq); + i40e_dbg_exit(); + return err; + } + + return 0; } module_init(i40e_init_module); From 771a794c0a3c3e7f0d86cc34be4f9537e8c0a20c Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 08:26:39 +0000 Subject: [PATCH 656/963] fm10k: Fix error handling in fm10k_init_module() A problem about modprobe fm10k failed is triggered with the following log given: Intel(R) Ethernet Switch Host Interface Driver Copyright(c) 2013 - 2019 Intel Corporation. debugfs: Directory 'fm10k' with parent '/' already present! The reason is that fm10k_init_module() returns fm10k_register_pci_driver() directly without checking its return value, if fm10k_register_pci_driver() failed, it returns without removing debugfs and destroy workqueue, resulting the debugfs of fm10k can never be created later and leaks the workqueue. fm10k_init_module() alloc_workqueue() fm10k_dbg_init() # create debugfs fm10k_register_pci_driver() pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without remove debugfs and destroy workqueue Fix by remove debugfs and destroy workqueue when fm10k_register_pci_driver() returns error. Fixes: 7461fd913afe ("fm10k: Add support for debugfs") Fixes: b382bb1b3e2d ("fm10k: use separate workqueue for fm10k driver") Signed-off-by: Yuan Can Reviewed-by: Jacob Keller Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 4a6630586ec9..fc373472e4e1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -32,6 +32,8 @@ struct workqueue_struct *fm10k_workqueue; **/ static int __init fm10k_init_module(void) { + int ret; + pr_info("%s\n", fm10k_driver_string); pr_info("%s\n", fm10k_copyright); @@ -43,7 +45,13 @@ static int __init fm10k_init_module(void) fm10k_dbg_init(); - return fm10k_register_pci_driver(); + ret = fm10k_register_pci_driver(); + if (ret) { + fm10k_dbg_exit(); + destroy_workqueue(fm10k_workqueue); + } + + return ret; } module_init(fm10k_init_module); From 227d8d2f7f2278b8468c5531b0cd0f2a905b4486 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 14 Nov 2022 08:26:40 +0000 Subject: [PATCH 657/963] iavf: Fix error handling in iavf_init_module() The iavf_init_module() won't destroy workqueue when pci_register_driver() failed. Call destroy_workqueue() when pci_register_driver() failed to prevent the resource leak. Similar to the handling of u132_hcd_init in commit f276e002793c ("usb: u132-hcd: fix resource leak") Fixes: 2803b16c10ea ("i40e/i40evf: Use private workqueue") Signed-off-by: Yuan Can Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index d7465296f650..f71e132ede09 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5196,6 +5196,8 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { + int ret; + pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); @@ -5206,7 +5208,12 @@ static int __init iavf_init_module(void) pr_err("%s: Failed to create workqueue\n", iavf_driver_name); return -ENOMEM; } - return pci_register_driver(&iavf_driver); + + ret = pci_register_driver(&iavf_driver); + if (ret) + destroy_workqueue(iavf_wq); + + return ret; } module_init(iavf_init_module); From 45605c75c52c7ae7bfe902214343aabcfe5ba0ff Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 16 Nov 2022 01:24:07 +0800 Subject: [PATCH 658/963] e100: Fix possible use after free in e100_xmit_prepare In e100_xmit_prepare(), if we can't map the skb, then return -ENOMEM, so e100_xmit_frame() will return NETDEV_TX_BUSY and the upper layer will resend the skb. But the skb is already freed, which will cause UAF bug when the upper layer resends the skb. Remove the harmful free. Fixes: 5e5d49422dfb ("e100: Release skb when DMA mapping is failed in e100_xmit_prepare") Signed-off-by: Wang Hai Reviewed-by: Alexander Duyck Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e100.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 560d1d442232..d3fdc290937f 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1741,11 +1741,8 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, DMA_TO_DEVICE); /* If we can't map the skb, have the upper layer try later */ - if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { - dev_kfree_skb_any(skb); - skb = NULL; + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) return -ENOMEM; - } /* * Use the last 4 bytes of the SKB payload packet as the CRC, used for From 4ba5f0c36cfdda68347269c02961cd90f8443ace Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 23 Nov 2022 17:07:16 +0100 Subject: [PATCH 659/963] s390/dasd: Fix spelling mistake "Ivalid" -> "Invalid" There is a spelling mistake in a pr_warn message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20220923132103.2486724-1-colin.i.king@gmail.com Signed-off-by: Stefan Haberland Link: https://lore.kernel.org/r/20221123160719.3002694-2-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index d0ddf2cc9786..9327dcdd6e5e 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -401,7 +401,7 @@ dasd_ioctl_copy_pair_swap(struct block_device *bdev, void __user *argp) return -EFAULT; } if (memchr_inv(data.reserved, 0, sizeof(data.reserved))) { - pr_warn("%s: Ivalid swap data specified.\n", + pr_warn("%s: Invalid swap data specified\n", dev_name(&device->cdev->dev)); dasd_put_device(device); return DASD_COPYPAIRSWAP_INVALID; From b49e648fcca7e420c4ad670a548e19f0e8531c30 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 23 Nov 2022 17:07:17 +0100 Subject: [PATCH 660/963] s390/dasd: increase printing of debug data payload 32 byte are to less for important data from prefix or other commands. Print up to 128 byte data. This is enough for the largest CCW data we have. Since printk can only print up to 1024 byte at once, print the different parts of the CCW dumps separately. Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20221123160719.3002694-3-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 662730f3b027..85bf045c2ff9 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -5500,7 +5500,7 @@ dasd_eckd_ioctl(struct dasd_block *block, unsigned int cmd, void __user *argp) * Dump the range of CCWs into 'page' buffer * and return number of printed chars. */ -static int +static void dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) { int len, count; @@ -5518,16 +5518,21 @@ dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page) else datap = (char *) ((addr_t) from->cda); - /* dump data (max 32 bytes) */ - for (count = 0; count < from->count && count < 32; count++) { - if (count % 8 == 0) len += sprintf(page + len, " "); - if (count % 4 == 0) len += sprintf(page + len, " "); + /* dump data (max 128 bytes) */ + for (count = 0; count < from->count && count < 128; count++) { + if (count % 32 == 0) + len += sprintf(page + len, "\n"); + if (count % 8 == 0) + len += sprintf(page + len, " "); + if (count % 4 == 0) + len += sprintf(page + len, " "); len += sprintf(page + len, "%02x", datap[count]); } len += sprintf(page + len, "\n"); from++; } - return len; + if (len > 0) + printk(KERN_ERR "%s", page); } static void @@ -5619,37 +5624,33 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, if (req) { /* req == NULL for unsolicited interrupts */ /* dump the Channel Program (max 140 Bytes per line) */ - /* Count CCW and print first CCWs (maximum 1024 % 140 = 7) */ + /* Count CCW and print first CCWs (maximum 7) */ first = req->cpaddr; for (last = first; last->flags & (CCW_FLAG_CC | CCW_FLAG_DC); last++); to = min(first + 6, last); - len = sprintf(page, PRINTK_HEADER - " Related CP in req: %p\n", req); - dasd_eckd_dump_ccw_range(first, to, page + len); - printk(KERN_ERR "%s", page); + printk(KERN_ERR PRINTK_HEADER " Related CP in req: %p\n", req); + dasd_eckd_dump_ccw_range(first, to, page); /* print failing CCW area (maximum 4) */ /* scsw->cda is either valid or zero */ - len = 0; from = ++to; fail = (struct ccw1 *)(addr_t) irb->scsw.cmd.cpa; /* failing CCW */ if (from < fail - 2) { from = fail - 2; /* there is a gap - print header */ - len += sprintf(page, PRINTK_HEADER "......\n"); + printk(KERN_ERR PRINTK_HEADER "......\n"); } to = min(fail + 1, last); - len += dasd_eckd_dump_ccw_range(from, to, page + len); + dasd_eckd_dump_ccw_range(from, to, page + len); /* print last CCWs (maximum 2) */ + len = 0; from = max(from, ++to); if (from < last - 1) { from = last - 1; /* there is a gap - print header */ - len += sprintf(page + len, PRINTK_HEADER "......\n"); + printk(KERN_ERR PRINTK_HEADER "......\n"); } - len += dasd_eckd_dump_ccw_range(from, last, page + len); - if (len > 0) - printk(KERN_ERR "%s", page); + dasd_eckd_dump_ccw_range(from, last, page + len); } free_page((unsigned long) page); } From 590ce6d96d6a224b470a3862c33a483d5022bfdb Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 23 Nov 2022 17:07:18 +0100 Subject: [PATCH 661/963] s390/dasd: fix no record found for raw_track_access For DASD devices in raw_track_access mode only full track images are read and written. For this purpose it is not necessary to do search operation in the locate record extended function. The documentation even states that this might fail if the searched record is not found on a track. Currently the driver sets a value of 1 in the search field for the first record after record zero. This is the default for disks not in raw_track_access mode but record 1 might be missing on a completely empty track. There has not been any problem with this on IBM storage servers but it might lead to errors with DASD devices on other vendors storage servers. Fix this by setting the search field to 0. Record zero is always available even on a completely empty track. Fixes: e4dbb0f2b5dd ("[S390] dasd: Add support for raw ECKD access.") Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20221123160719.3002694-4-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_eckd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 85bf045c2ff9..5d0b9991e91a 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -4722,7 +4722,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, struct dasd_device *basedev; struct req_iterator iter; struct dasd_ccw_req *cqr; - unsigned int first_offs; unsigned int trkcount; unsigned long *idaws; unsigned int size; @@ -4756,7 +4755,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) / DASD_RAW_SECTORS_PER_TRACK; trkcount = last_trk - first_trk + 1; - first_offs = 0; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; @@ -4800,13 +4798,13 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, if (use_prefix) { prefix_LRE(ccw++, data, first_trk, last_trk, cmd, basedev, - startdev, 1, first_offs + 1, trkcount, 0, 0); + startdev, 1, 0, trkcount, 0, 0); } else { define_extent(ccw++, data, first_trk, last_trk, cmd, basedev, 0); ccw[-1].flags |= CCW_FLAG_CC; data += sizeof(struct DE_eckd_data); - locate_record_ext(ccw++, data, first_trk, first_offs + 1, + locate_record_ext(ccw++, data, first_trk, 0, trkcount, cmd, basedev, 0, 0); } From 7e8a05b47ba7200f333eefd19979eeb4d273ceec Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 23 Nov 2022 17:07:19 +0100 Subject: [PATCH 662/963] s390/dasd: fix possible buffer overflow in copy_pair_show dasd_copy_relation->entry[] array might be accessed out of bounds if the loop does not break. Fixes: a91ff09d39f9 ("s390/dasd: add copy pair setup") Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Link: https://lore.kernel.org/r/20221123160719.3002694-5-sth@linux.ibm.com Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_devmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index cb83f81da416..df17f0f9cb0f 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1954,7 +1954,7 @@ dasd_copy_pair_show(struct device *dev, break; } } - if (!copy->entry[i].primary) + if (i == DASD_CP_ENTRIES) goto out; /* print all secondary */ From 9f0933ac026f7e54fe096797af9de20724e79097 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 21 Nov 2022 16:31:34 +0000 Subject: [PATCH 663/963] fscache: fix OOB Read in __fscache_acquire_volume The type of a->key[0] is char in fscache_volume_same(). If the length of cache volume key is greater than 127, the value of a->key[0] is less than 0. In this case, klen becomes much larger than 255 after type conversion, because the type of klen is size_t. As a result, memcmp() is read out of bounds. This causes a slab-out-of-bounds Read in __fscache_acquire_volume(), as reported by Syzbot. Fix this by changing the type of the stored key to "u8 *" rather than "char *" (it isn't a simple string anyway). Also put in a check that the volume name doesn't exceed NAME_MAX. BUG: KASAN: slab-out-of-bounds in memcmp+0x16f/0x1c0 lib/string.c:757 Read of size 8 at addr ffff888016f3aa90 by task syz-executor344/3613 Call Trace: memcmp+0x16f/0x1c0 lib/string.c:757 memcmp include/linux/fortify-string.h:420 [inline] fscache_volume_same fs/fscache/volume.c:133 [inline] fscache_hash_volume fs/fscache/volume.c:171 [inline] __fscache_acquire_volume+0x76c/0x1080 fs/fscache/volume.c:328 fscache_acquire_volume include/linux/fscache.h:204 [inline] v9fs_cache_session_get_cookie+0x143/0x240 fs/9p/cache.c:34 v9fs_session_init+0x1166/0x1810 fs/9p/v9fs.c:473 v9fs_mount+0xba/0xc90 fs/9p/vfs_super.c:126 legacy_get_tree+0x105/0x220 fs/fs_context.c:610 vfs_get_tree+0x89/0x2f0 fs/super.c:1530 do_new_mount fs/namespace.c:3040 [inline] path_mount+0x1326/0x1e20 fs/namespace.c:3370 do_mount fs/namespace.c:3383 [inline] __do_sys_mount fs/namespace.c:3591 [inline] __se_sys_mount fs/namespace.c:3568 [inline] __x64_sys_mount+0x27f/0x300 fs/namespace.c:3568 Fixes: 62ab63352350 ("fscache: Implement volume registration") Reported-by: syzbot+a76f6a6e524cf2080aa3@syzkaller.appspotmail.com Signed-off-by: David Howells Reviewed-by: Zhang Peng Reviewed-by: Jingbo Xu cc: Dominique Martinet cc: Jeff Layton cc: v9fs-developer@lists.sourceforge.net cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/Y3OH+Dmi0QIOK18n@codewreck.org/ # Zhang Peng's v1 fix Link: https://lore.kernel.org/r/20221115140447.2971680-1-zhangpeng362@huawei.com/ # Zhang Peng's v2 fix Link: https://lore.kernel.org/r/166869954095.3793579.8500020902371015443.stgit@warthog.procyon.org.uk/ # v1 Signed-off-by: Linus Torvalds --- fs/fscache/volume.c | 7 +++++-- include/linux/fscache.h | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index a058e0136bfe..ab8ceddf9efa 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -203,7 +203,11 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, struct fscache_volume *volume; struct fscache_cache *cache; size_t klen, hlen; - char *key; + u8 *key; + + klen = strlen(volume_key); + if (klen > NAME_MAX) + return NULL; if (!coherency_data) coherency_len = 0; @@ -229,7 +233,6 @@ static struct fscache_volume *fscache_alloc_volume(const char *volume_key, /* Stick the length on the front of the key and pad it out to make * hashing easier. */ - klen = strlen(volume_key); hlen = round_up(1 + klen + 1, sizeof(__le32)); key = kzalloc(hlen, GFP_KERNEL); if (!key) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 36e5dd84cf59..8e312c8323a8 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -75,7 +75,7 @@ struct fscache_volume { atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; unsigned int key_hash; /* Hash of key string */ - char *key; /* Volume ID, eg. "afs@example.com@1234" */ + u8 *key; /* Volume ID, eg. "afs@example.com@1234" */ struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ struct hlist_bl_node hash_link; /* Link in hash table */ struct work_struct work; From 8ac3b5cd3e0521d92f9755e90d140382fc292510 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 20 Sep 2022 19:06:33 +0200 Subject: [PATCH 664/963] lib/vdso: use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this up by moving the vdso Makefile to use "grep -E" instead. Cc: Andy Lutomirski Cc: Thomas Gleixner Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20220920170633.3133829-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- lib/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index c415a685d61b..e814061d6aa0 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -17,6 +17,6 @@ $(error ARCH_REL_TYPE_ABS is not set) endif quiet_cmd_vdso_check = VDSOCHK $@ - cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \ + cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \ then (echo >&2 "$@: dynamic relocations are not supported"; \ rm -f $@; /bin/false); fi From ac8db824ead0de2e9111337c401409d010fba2f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Nov 2022 14:14:32 -0500 Subject: [PATCH 665/963] NFSD: Fix reads with a non-zero offset that don't end on a page boundary This was found when virtual machines with nfs-mounted qcow2 disks failed to boot properly. Reported-by: Anders Blomdell Suggested-by: Al Viro Link: https://bugzilla.redhat.com/show_bug.cgi?id=2142132 Fixes: bfbfb6182ad1 ("nfsd_splice_actor(): handle compound pages") Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 83be89905cbf..31bc7cc82439 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -871,10 +871,11 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct svc_rqst *rqstp = sd->u.data; struct page *page = buf->page; // may be a compound one unsigned offset = buf->offset; + struct page *last_page; - page += offset / PAGE_SIZE; - for (int i = sd->len; i > 0; i -= PAGE_SIZE) - svc_rqst_replace_page(rqstp, page++); + last_page = page + (offset + sd->len - 1) / PAGE_SIZE; + for (page += offset / PAGE_SIZE; page <= last_page; page++) + svc_rqst_replace_page(rqstp, page); if (rqstp->rq_res.page_len == 0) // first call rqstp->rq_res.page_base = offset % PAGE_SIZE; rqstp->rq_res.page_len += sd->len; From 47b0c2e4c220f2251fd8dcfbb44479819c715e15 Mon Sep 17 00:00:00 2001 From: Kazuki Takiguchi Date: Wed, 23 Nov 2022 14:36:00 -0500 Subject: [PATCH 666/963] KVM: x86/mmu: Fix race condition in direct_page_fault make_mmu_pages_available() must be called with mmu_lock held for write. However, if the TDP MMU is used, it will be called with mmu_lock held for read. This function does nothing unless shadow pages are used, so there is no race unless nested TDP is used. Since nested TDP uses shadow pages, old shadow pages may be zapped by this function even when the TDP MMU is enabled. Since shadow pages are never allocated by kvm_tdp_mmu_map(), a race condition can be avoided by not calling make_mmu_pages_available() if the TDP MMU is currently in use. I encountered this when repeatedly starting and stopping nested VM. It can be artificially caused by allocating a large number of nested TDP SPTEs. For example, the following BUG and general protection fault are caused in the host kernel. pte_list_remove: 00000000cd54fc10 many->many ------------[ cut here ]------------ kernel BUG at arch/x86/kvm/mmu/mmu.c:963! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:pte_list_remove.cold+0x16/0x48 [kvm] Call Trace: drop_spte+0xe0/0x180 [kvm] mmu_page_zap_pte+0x4f/0x140 [kvm] __kvm_mmu_prepare_zap_page+0x62/0x3e0 [kvm] kvm_mmu_zap_oldest_mmu_pages+0x7d/0xf0 [kvm] direct_page_fault+0x3cb/0x9b0 [kvm] kvm_tdp_page_fault+0x2c/0xa0 [kvm] kvm_mmu_page_fault+0x207/0x930 [kvm] npf_interception+0x47/0xb0 [kvm_amd] svm_invoke_exit_handler+0x13c/0x1a0 [kvm_amd] svm_handle_exit+0xfc/0x2c0 [kvm_amd] kvm_arch_vcpu_ioctl_run+0xa79/0x1780 [kvm] kvm_vcpu_ioctl+0x29b/0x6f0 [kvm] __x64_sys_ioctl+0x95/0xd0 do_syscall_64+0x5c/0x90 general protection fault, probably for non-canonical address 0xdead000000000122: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:kvm_mmu_commit_zap_page.part.0+0x4b/0xe0 [kvm] Call Trace: kvm_mmu_zap_oldest_mmu_pages+0xae/0xf0 [kvm] direct_page_fault+0x3cb/0x9b0 [kvm] kvm_tdp_page_fault+0x2c/0xa0 [kvm] kvm_mmu_page_fault+0x207/0x930 [kvm] npf_interception+0x47/0xb0 [kvm_amd] CVE: CVE-2022-45869 Fixes: a2855afc7ee8 ("KVM: x86/mmu: Allow parallel page faults for the TDP MMU") Signed-off-by: Kazuki Takiguchi Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1ccb769f62af..b6f96d47e596 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2443,6 +2443,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, { bool list_unstable, zapped_root = false; + lockdep_assert_held_write(&kvm->mmu_lock); trace_kvm_mmu_prepare_zap_page(sp); ++kvm->stat.mmu_shadow_zapped; *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); @@ -4262,14 +4263,14 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (is_page_fault_stale(vcpu, fault, mmu_seq)) goto out_unlock; - r = make_mmu_pages_available(vcpu); - if (r) - goto out_unlock; - - if (is_tdp_mmu_fault) + if (is_tdp_mmu_fault) { r = kvm_tdp_mmu_map(vcpu, fault); - else + } else { + r = make_mmu_pages_available(vcpu); + if (r) + goto out_unlock; r = __direct_map(vcpu, fault); + } out_unlock: if (is_tdp_mmu_fault) From 4ea9439fd537313f3381f0af4ebbf05e3f51a58c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 12 Nov 2022 13:48:58 +0000 Subject: [PATCH 667/963] KVM: x86/xen: Validate port number in SCHEDOP_poll We shouldn't allow guests to poll on arbitrary port numbers off the end of the event channel table. Fixes: 1a65105a5aba ("KVM: x86/xen: handle PV spinlocks slowpath") [dwmw2: my bug though; the original version did check the validity as a side-effect of an idr_find() which I ripped out in refactoring.] Reported-by: Michal Luczaj Signed-off-by: David Woodhouse Reviewed-by: Sean Christopherson Cc: stable@kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 2dae413bd62a..dc2f304f2e69 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -954,6 +954,14 @@ static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); } +static inline int max_evtchn_port(struct kvm *kvm) +{ + if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) + return EVTCHN_2L_NR_CHANNELS; + else + return COMPAT_EVTCHN_2L_NR_CHANNELS; +} + static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, evtchn_port_t *ports) { @@ -1042,6 +1050,10 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, *r = -EFAULT; goto out; } + if (ports[i] >= max_evtchn_port(vcpu->kvm)) { + *r = -EINVAL; + goto out; + } } if (sched_poll.nr_ports == 1) @@ -1297,14 +1309,6 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) return 0; } -static inline int max_evtchn_port(struct kvm *kvm) -{ - if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) - return EVTCHN_2L_NR_CHANNELS; - else - return COMPAT_EVTCHN_2L_NR_CHANNELS; -} - static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) { int poll_evtchn = vcpu->arch.xen.poll_evtchn; From c2b8cdfaf3a6721afe0c8c060a631b1c67a7f1ee Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 12 Nov 2022 13:52:25 +0000 Subject: [PATCH 668/963] KVM: x86/xen: Only do in-kernel acceleration of hypercalls for guest CPL0 There are almost no hypercalls which are valid from CPL > 0, and definitely none which are handled by the kernel. Fixes: 2fd6df2f2b47 ("KVM: x86/xen: intercept EVTCHNOP_send from guests") Reported-by: Michal Luczaj Signed-off-by: David Woodhouse Reviewed-by: Sean Christopherson Cc: stable@kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/xen.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index dc2f304f2e69..f3098c0e386a 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -1227,6 +1227,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) bool longmode; u64 input, params[6], r = -ENOSYS; bool handled = false; + u8 cpl; input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); @@ -1254,9 +1255,17 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) params[5] = (u64)kvm_r9_read(vcpu); } #endif + cpl = static_call(kvm_x86_get_cpl)(vcpu); trace_kvm_xen_hypercall(input, params[0], params[1], params[2], params[3], params[4], params[5]); + /* + * Only allow hypercall acceleration for CPL0. The rare hypercalls that + * are permitted in guest userspace can be handled by the VMM. + */ + if (unlikely(cpl > 0)) + goto handle_in_userspace; + switch (input) { case __HYPERVISOR_xen_version: if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { @@ -1291,10 +1300,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) if (handled) return kvm_xen_hypercall_set_result(vcpu, r); +handle_in_userspace: vcpu->run->exit_reason = KVM_EXIT_XEN; vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; vcpu->run->xen.u.hcall.longmode = longmode; - vcpu->run->xen.u.hcall.cpl = static_call(kvm_x86_get_cpl)(vcpu); + vcpu->run->xen.u.hcall.cpl = cpl; vcpu->run->xen.u.hcall.input = input; vcpu->run->xen.u.hcall.params[0] = params[0]; vcpu->run->xen.u.hcall.params[1] = params[1]; From 8332f0ed4f187c7b700831bd7cc83ce180a944b9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Nov 2022 09:25:39 +0000 Subject: [PATCH 669/963] KVM: Update gfn_to_pfn_cache khva when it moves within the same page In the case where a GPC is refreshed to a different location within the same page, we didn't bother to update it. Mostly we don't need to, but since the ->khva field also includes the offset within the page, that does have to be updated. Fixes: 3ba2c95ea180 ("KVM: Do not incorporate page offset into gfn=>pfn cache user address") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant Reviewed-by: Sean Christopherson Cc: stable@kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/pfncache.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index 346e47f15572..7c248193ca26 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -297,7 +297,12 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, if (!gpc->valid || old_uhva != gpc->uhva) { ret = hva_to_pfn_retry(kvm, gpc); } else { - /* If the HVA→PFN mapping was already valid, don't unmap it. */ + /* + * If the HVA→PFN mapping was already valid, don't unmap it. + * But do update gpc->khva because the offset within the page + * may have changed. + */ + gpc->khva = old_khva + page_offset; old_pfn = KVM_PFN_ERR_FAULT; old_khva = NULL; ret = 0; From ef38c79a522b660f7f71d45dad2d6244bc741841 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 23 Nov 2022 16:43:23 -0500 Subject: [PATCH 670/963] tracing: Fix race where histograms can be called before the event commit 94eedf3dded5 ("tracing: Fix race where eprobes can be called before the event") fixed an issue where if an event is soft disabled, and the trigger is being added, there's a small window where the event sees that there's a trigger but does not see that it requires reading the event yet, and then calls the trigger with the record == NULL. This could be solved with adding memory barriers in the hot path, or to make sure that all the triggers requiring a record check for NULL. The latter was chosen. Commit 94eedf3dded5 set the eprobe trigger handle to check for NULL, but the same needs to be done with histograms. Link: https://lore.kernel.org/linux-trace-kernel/20221118211809.701d40c0f8a757b0df3c025a@kernel.org/ Link: https://lore.kernel.org/linux-trace-kernel/20221123164323.03450c3a@gandalf.local.home Cc: Tom Zanussi Cc: stable@vger.kernel.org Fixes: 7491e2c442781 ("tracing: Add a probe that attaches to trace events") Reported-by: Masami Hiramatsu (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 087c19548049..1c82478e8dff 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5143,6 +5143,9 @@ static void event_hist_trigger(struct event_trigger_data *data, void *key = NULL; unsigned int i; + if (unlikely(!rbe)) + return; + memset(compound_key, 0, hist_data->key_size); for_each_hist_key_field(i, hist_data) { From e18eb8783ec4949adebc7d7b0fdb65f65bfeefd9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 23 Nov 2022 14:25:57 -0500 Subject: [PATCH 671/963] tracing: Add tracing_reset_all_online_cpus_unlocked() function Currently the tracing_reset_all_online_cpus() requires the trace_types_lock held. But only one caller of this function actually has that lock held before calling it, and the other just takes the lock so that it can call it. More users of this function is needed where the lock is not held. Add a tracing_reset_all_online_cpus_unlocked() function for the one use case that calls it without being held, and also add a lockdep_assert to make sure it is held when called. Then have tracing_reset_all_online_cpus() take the lock internally, such that callers do not need to worry about taking it. Link: https://lkml.kernel.org/r/20221123192741.658273220@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Zheng Yejian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 11 ++++++++++- kernel/trace/trace.h | 1 + kernel/trace/trace_events.c | 2 +- kernel/trace/trace_events_synth.c | 2 -- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fe0e115272..5cfc95a52bc3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2180,10 +2180,12 @@ void tracing_reset_online_cpus(struct array_buffer *buf) } /* Must have trace_types_lock held */ -void tracing_reset_all_online_cpus(void) +void tracing_reset_all_online_cpus_unlocked(void) { struct trace_array *tr; + lockdep_assert_held(&trace_types_lock); + list_for_each_entry(tr, &ftrace_trace_arrays, list) { if (!tr->clear_trace) continue; @@ -2195,6 +2197,13 @@ void tracing_reset_all_online_cpus(void) } } +void tracing_reset_all_online_cpus(void) +{ + mutex_lock(&trace_types_lock); + tracing_reset_all_online_cpus_unlocked(); + mutex_unlock(&trace_types_lock); +} + /* * The tgid_map array maps from pid to tgid; i.e. the value stored at index i * is the tgid last observed corresponding to pid=i. diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54ee5711c729..d42e24507152 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -580,6 +580,7 @@ int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); void tracing_reset_current(int cpu); void tracing_reset_all_online_cpus(void); +void tracing_reset_all_online_cpus_unlocked(void); int tracing_open_generic(struct inode *inode, struct file *filp); int tracing_open_generic_tr(struct inode *inode, struct file *filp); bool tracing_is_disabled(void); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0356cae0cf74..78cd19e31dba 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2972,7 +2972,7 @@ static void trace_module_remove_events(struct module *mod) * over from this module may be passed to the new module events and * unexpected results may occur. */ - tracing_reset_all_online_cpus(); + tracing_reset_all_online_cpus_unlocked(); } static int trace_module_notify(struct notifier_block *self, diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 29fbfb27c2b2..c3b582d19b62 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1425,7 +1425,6 @@ int synth_event_delete(const char *event_name) mutex_unlock(&event_mutex); if (mod) { - mutex_lock(&trace_types_lock); /* * It is safest to reset the ring buffer if the module * being unloaded registered any events that were @@ -1437,7 +1436,6 @@ int synth_event_delete(const char *event_name) * occur. */ tracing_reset_all_online_cpus(); - mutex_unlock(&trace_types_lock); } return ret; From 4313e5a613049dfc1819a6dfb5f94cf2caff9452 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 23 Nov 2022 17:14:34 -0500 Subject: [PATCH 672/963] tracing: Free buffers when a used dynamic event is removed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After 65536 dynamic events have been added and removed, the "type" field of the event then uses the first type number that is available (not currently used by other events). A type number is the identifier of the binary blobs in the tracing ring buffer (known as events) to map them to logic that can parse the binary blob. The issue is that if a dynamic event (like a kprobe event) is traced and is in the ring buffer, and then that event is removed (because it is dynamic, which means it can be created and destroyed), if another dynamic event is created that has the same number that new event's logic on parsing the binary blob will be used. To show how this can be an issue, the following can crash the kernel: # cd /sys/kernel/tracing # for i in `seq 65536`; do echo 'p:kprobes/foo do_sys_openat2 $arg1:u32' > kprobe_events # done For every iteration of the above, the writing to the kprobe_events will remove the old event and create a new one (with the same format) and increase the type number to the next available on until the type number reaches over 65535 which is the max number for the 16 bit type. After it reaches that number, the logic to allocate a new number simply looks for the next available number. When an dynamic event is removed, that number is then available to be reused by the next dynamic event created. That is, once the above reaches the max number, the number assigned to the event in that loop will remain the same. Now that means deleting one dynamic event and created another will reuse the previous events type number. This is where bad things can happen. After the above loop finishes, the kprobes/foo event which reads the do_sys_openat2 function call's first parameter as an integer. # echo 1 > kprobes/foo/enable # cat /etc/passwd > /dev/null # cat trace cat-2211 [005] .... 2007.849603: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196 cat-2211 [005] .... 2007.849620: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196 cat-2211 [005] .... 2007.849838: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196 cat-2211 [005] .... 2007.849880: foo: (do_sys_openat2+0x0/0x130) arg1=4294967196 # echo 0 > kprobes/foo/enable Now if we delete the kprobe and create a new one that reads a string: # echo 'p:kprobes/foo do_sys_openat2 +0($arg2):string' > kprobe_events And now we can the trace: # cat trace sendmail-1942 [002] ..... 530.136320: foo: (do_sys_openat2+0x0/0x240) arg1= cat-2046 [004] ..... 530.930817: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������" cat-2046 [004] ..... 530.930961: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������" cat-2046 [004] ..... 530.934278: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������" cat-2046 [004] ..... 530.934563: foo: (do_sys_openat2+0x0/0x240) arg1="������������������������������������������������������������������������������������������������" bash-1515 [007] ..... 534.299093: foo: (do_sys_openat2+0x0/0x240) arg1="kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk���������@��4Z����;Y�����U And dmesg has: ================================================================== BUG: KASAN: use-after-free in string+0xd4/0x1c0 Read of size 1 at addr ffff88805fdbbfa0 by task cat/2049 CPU: 0 PID: 2049 Comm: cat Not tainted 6.1.0-rc6-test+ #641 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 Call Trace: dump_stack_lvl+0x5b/0x77 print_report+0x17f/0x47b kasan_report+0xad/0x130 string+0xd4/0x1c0 vsnprintf+0x500/0x840 seq_buf_vprintf+0x62/0xc0 trace_seq_printf+0x10e/0x1e0 print_type_string+0x90/0xa0 print_kprobe_event+0x16b/0x290 print_trace_line+0x451/0x8e0 s_show+0x72/0x1f0 seq_read_iter+0x58e/0x750 seq_read+0x115/0x160 vfs_read+0x11d/0x460 ksys_read+0xa9/0x130 do_syscall_64+0x3a/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fc2e972ade2 Code: c0 e9 b2 fe ff ff 50 48 8d 3d b2 3f 0a 00 e8 05 f0 01 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 RSP: 002b:00007ffc64e687c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007fc2e972ade2 RDX: 0000000000020000 RSI: 00007fc2e980d000 RDI: 0000000000000003 RBP: 00007fc2e980d000 R08: 00007fc2e980c010 R09: 0000000000000000 R10: 0000000000000022 R11: 0000000000000246 R12: 0000000000020f00 R13: 0000000000000003 R14: 0000000000020000 R15: 0000000000020000 The buggy address belongs to the physical page: page:ffffea00017f6ec0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x5fdbb flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff) raw: 000fffffc0000000 0000000000000000 ffffea00017f6ec8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88805fdbbe80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88805fdbbf00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff88805fdbbf80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff88805fdbc000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88805fdbc080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== This was found when Zheng Yejian sent a patch to convert the event type number assignment to use IDA, which gives the next available number, and this bug showed up in the fuzz testing by Yujie Liu and the kernel test robot. But after further analysis, I found that this behavior is the same as when the event type numbers go past the 16bit max (and the above shows that). As modules have a similar issue, but is dealt with by setting a "WAS_ENABLED" flag when a module event is enabled, and when the module is freed, if any of its events were enabled, the ring buffer that holds that event is also cleared, to prevent reading stale events. The same can be done for dynamic events. If any dynamic event that is being removed was enabled, then make sure the buffers they were enabled in are now cleared. Link: https://lkml.kernel.org/r/20221123171434.545706e3@gandalf.local.home Link: https://lore.kernel.org/all/20221110020319.1259291-1-zhengyejian1@huawei.com/ Cc: stable@vger.kernel.org Cc: Andrew Morton Depends-on: e18eb8783ec49 ("tracing: Add tracing_reset_all_online_cpus_unlocked() function") Depends-on: 5448d44c38557 ("tracing: Add unified dynamic event framework") Depends-on: 6212dd29683ee ("tracing/kprobes: Use dyn_event framework for kprobe events") Depends-on: 065e63f951432 ("tracing: Only have rmmod clear buffers that its events were active in") Depends-on: 575380da8b469 ("tracing: Only clear trace buffer on module unload if event was traced") Fixes: 77b44d1b7c283 ("tracing/kprobes: Rename Kprobe-tracer to kprobe-event") Reported-by: Zheng Yejian Reported-by: Yujie Liu Reported-by: kernel test robot Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_dynevent.c | 2 ++ kernel/trace/trace_events.c | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 154996684fb5..4376887e0d8a 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -118,6 +118,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type if (ret) break; } + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); out: argv_free(argv); @@ -214,6 +215,7 @@ int dyn_events_release_all(struct dyn_event_operations *type) break; } out: + tracing_reset_all_online_cpus(); mutex_unlock(&event_mutex); return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 78cd19e31dba..f71ea6e79b3c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2880,7 +2880,10 @@ static int probe_remove_event_call(struct trace_event_call *call) * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) - return -EBUSY; + goto busy; + + if (file->flags & EVENT_FILE_FL_WAS_ENABLED) + tr->clear_trace = true; /* * The do_for_each_event_file_safe() is * a double loop. After finding the call for this @@ -2893,6 +2896,12 @@ static int probe_remove_event_call(struct trace_event_call *call) __trace_remove_event_call(call); return 0; + busy: + /* No need to clear the trace now */ + list_for_each_entry(tr, &ftrace_trace_arrays, list) { + tr->clear_trace = false; + } + return -EBUSY; } /* Remove an event_call */ From 083cad78042e9eb6d2d727ff561dc6eea36ac9a0 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 22 Nov 2022 23:39:02 +0900 Subject: [PATCH 673/963] kbuild: fix "cat: .version: No such file or directory" Since commit 2df8220cc511 ("kbuild: build init/built-in.a just once"), the .version file is not touched at all when KBUILD_BUILD_VERSION is given. If KBUILD_BUILD_VERSION is specified and the .version file is missing (for example right after 'make mrproper'), "No such file or director" is shown. Even if the .version exists, it is irrelevant to the version of the current build. $ make -j$(nproc) KBUILD_BUILD_VERSION=100 mrproper defconfig all [ snip ] BUILD arch/x86/boot/bzImage cat: .version: No such file or directory Kernel: arch/x86/boot/bzImage is ready (#) Show KBUILD_BUILD_VERSION if it is given. Fixes: 2df8220cc511 ("kbuild: build init/built-in.a just once") Signed-off-by: Masahiro Yamada Reviewed-by: Nicolas Schier --- arch/microblaze/Makefile | 4 ++-- arch/x86/boot/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 3f8a86c4336a..02e6be9c5b0d 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -67,12 +67,12 @@ linux.bin.ub linux.bin.gz: linux.bin linux.bin: vmlinux linux.bin linux.bin.gz linux.bin.ub: $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' PHONY += simpleImage.$(DTB) simpleImage.$(DTB): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(addprefix $(boot)/$@., ub unstrip strip) - @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' + @echo 'Kernel: $(boot)/$@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' define archhelp echo '* linux.bin - Create raw binary' diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 9860ca5979f8..9e38ffaadb5d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -83,7 +83,7 @@ cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \ $(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE $(call if_changed,image) - @$(kecho) 'Kernel: $@ is ready' ' (#'`cat .version`')' + @$(kecho) 'Kernel: $@ is ready' ' (#'$(or $(KBUILD_BUILD_VERSION),`cat .version`)')' OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE From df1f1ea9569eef05c006aeae1d65fbf6c2339677 Mon Sep 17 00:00:00 2001 From: Paran Lee Date: Thu, 24 Nov 2022 07:00:44 +0900 Subject: [PATCH 674/963] scripts: add rust in scripts/Makefile.package Add rust argument at TAR_CONTENT in scripts/Makefile.package script with alphabetical order. Signed-off-by: Paran Lee Signed-off-by: Masahiro Yamada --- scripts/Makefile.package | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 8bbcced67c22..2a90139ecbe1 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -30,8 +30,8 @@ KBUILD_PKG_ROOTCMD ?="fakeroot -u" export KDEB_SOURCENAME # Include only those top-level files that are needed by make, plus the GPL copy TAR_CONTENT := Documentation LICENSES arch block certs crypto drivers fs \ - include init io_uring ipc kernel lib mm net samples scripts \ - security sound tools usr virt \ + include init io_uring ipc kernel lib mm net rust \ + samples scripts security sound tools usr virt \ .config .scmversion Makefile \ Kbuild Kconfig COPYING $(wildcard localversion*) MKSPEC := $(srctree)/scripts/package/mkspec From b6e7c196ac2f90f08955ca4db568a52160876407 Mon Sep 17 00:00:00 2001 From: Nir Levy Date: Mon, 21 Nov 2022 00:06:30 +0200 Subject: [PATCH 675/963] Documentation: networking: Update generic_netlink_howto URL The documentation refers to invalid web page under www.linuxfoundation.org The patch refers to a working URL under wiki.linuxfoundation.org Signed-off-by: Nir Levy Link: https://lore.kernel.org/all/20221120220630.7443-1-bhr166@gmail.com/ Signed-off-by: Jakub Kicinski --- Documentation/networking/generic_netlink.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/generic_netlink.rst b/Documentation/networking/generic_netlink.rst index 59e04ccf80c1..d960dbd7e80e 100644 --- a/Documentation/networking/generic_netlink.rst +++ b/Documentation/networking/generic_netlink.rst @@ -6,4 +6,4 @@ Generic Netlink A wiki document on how to use Generic Netlink can be found here: - * http://www.linuxfoundation.org/collaborate/workgroups/networking/generic_netlink_howto + * https://wiki.linuxfoundation.org/networking/generic_netlink_howto From 7d4a93176e0142ce16d23c849a47d5b00b856296 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 21 Nov 2022 23:56:45 +0800 Subject: [PATCH 676/963] ublk_drv: don't forward io commands in reserve order Either ublk_can_use_task_work() is true or not, io commands are forwarded to ublk server in reverse order, since llist_add() is always to add one element to the head of the list. Even though block layer doesn't guarantee request dispatch order, requests should be sent to hardware in the sequence order generated from io scheduler, which usually considers the request's LBA, and order is often important for HDD. So forward io commands in the sequence made from io scheduler by aligning task work with current io_uring command's batch handling, and it has been observed that both can get similar performance data if IORING_SETUP_COOP_TASKRUN is set from ublk server. Reported-by: Andreas Hindborg Cc: Damien Le Moal Signed-off-by: Ming Lei Reviewed-by: Damien Le Moal Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20221121155645.396272-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 82 +++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 44 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f96cb01e9604..e9de9d846b73 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -57,10 +57,8 @@ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) struct ublk_rq_data { - union { - struct callback_head work; - struct llist_node node; - }; + struct llist_node node; + struct callback_head work; }; struct ublk_uring_cmd_pdu { @@ -766,15 +764,31 @@ static inline void __ublk_rq_task_work(struct request *req) ubq_complete_io_cmd(io, UBLK_IO_RES_OK); } +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) +{ + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data, *tmp; + + io_cmds = llist_reverse_order(io_cmds); + llist_for_each_entry_safe(data, tmp, io_cmds, node) + __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); +} + +static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) +{ + struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); + struct ublk_rq_data *data, *tmp; + + llist_for_each_entry_safe(data, tmp, io_cmds, node) + __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); +} + static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data; - llist_for_each_entry(data, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); + ublk_forward_io_cmds(ubq); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -782,14 +796,20 @@ static void ublk_rq_task_work_fn(struct callback_head *work) struct ublk_rq_data *data = container_of(work, struct ublk_rq_data, work); struct request *req = blk_mq_rq_from_pdu(data); + struct ublk_queue *ubq = req->mq_hctx->driver_data; - __ublk_rq_task_work(req); + ublk_forward_io_cmds(ubq); } -static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) +static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { - struct ublk_io *io = &ubq->ios[rq->tag]; + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); + struct ublk_io *io; + if (!llist_add(&data->node, &ubq->io_cmds)) + return; + + io = &ubq->ios[rq->tag]; /* * If the check pass, we know that this is a re-issued request aborted * previously in monitor_work because the ubq_daemon(cmd's task) is @@ -803,11 +823,11 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) * guarantees that here is a re-issued request aborted previously. */ if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) { - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data; - - llist_for_each_entry(data, io_cmds, node) - __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); + ublk_abort_io_cmds(ubq); + } else if (ublk_can_use_task_work(ubq)) { + if (task_work_add(ubq->ubq_daemon, &data->work, + TWA_SIGNAL_NO_IPI)) + ublk_abort_io_cmds(ubq); } else { struct io_uring_cmd *cmd = io->cmd; struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); @@ -817,23 +837,6 @@ static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq) } } -static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq, - bool last) -{ - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); - - if (ublk_can_use_task_work(ubq)) { - enum task_work_notify_mode notify_mode = last ? - TWA_SIGNAL_NO_IPI : TWA_NONE; - - if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) - __ublk_abort_rq(ubq, rq); - } else { - if (llist_add(&data->node, &ubq->io_cmds)) - ublk_submit_cmd(ubq, rq); - } -} - static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -865,19 +868,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } - ublk_queue_cmd(ubq, rq, bd->last); + ublk_queue_cmd(ubq, rq); return BLK_STS_OK; } -static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx) -{ - struct ublk_queue *ubq = hctx->driver_data; - - if (ublk_can_use_task_work(ubq)) - __set_notify_signal(ubq->ubq_daemon); -} - static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, unsigned int hctx_idx) { @@ -899,7 +894,6 @@ static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req, static const struct blk_mq_ops ublk_mq_ops = { .queue_rq = ublk_queue_rq, - .commit_rqs = ublk_commit_rqs, .init_hctx = ublk_init_hctx, .init_request = ublk_init_rq, }; @@ -1197,7 +1191,7 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); - ublk_queue_cmd(ubq, req, true); + ublk_queue_cmd(ubq, req); } static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) From c60c152230828825c06e62a8f1ce956d4b659266 Mon Sep 17 00:00:00 2001 From: Martin Faltesek Date: Mon, 21 Nov 2022 18:42:44 -0600 Subject: [PATCH 677/963] nfc: st-nci: fix incorrect validating logic in EVT_TRANSACTION The first validation check for EVT_TRANSACTION has two different checks tied together with logical AND. One is a check for minimum packet length, and the other is for a valid aid_tag. If either condition is true (fails), then an error should be triggered. The fix is to change && to ||. Reported-by: Denis Efremov Reviewed-by: Guenter Roeck Fixes: 5d1ceb7f5e56 ("NFC: st21nfcb: Add HCI transaction event support") Signed-off-by: Martin Faltesek Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- drivers/nfc/st-nci/se.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index 7764b1a4c3cf..589e1dec78e7 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -326,7 +326,7 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, * AID 81 5 to 16 * PARAMETERS 82 0 to 255 */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 && + if (skb->len < NFC_MIN_AID_LENGTH + 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; From 440f2ae9c9f06e26f5dcea697a53717fc61a318c Mon Sep 17 00:00:00 2001 From: Martin Faltesek Date: Mon, 21 Nov 2022 18:42:45 -0600 Subject: [PATCH 678/963] nfc: st-nci: fix memory leaks in EVT_TRANSACTION Error path does not free previously allocated memory. Add devm_kfree() to the failure path. Reported-by: Denis Efremov Reviewed-by: Guenter Roeck Fixes: 5d1ceb7f5e56 ("NFC: st21nfcb: Add HCI transaction event support") Signed-off-by: Martin Faltesek Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- drivers/nfc/st-nci/se.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index 589e1dec78e7..fc59916ae5ae 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -339,8 +339,10 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, /* Check next byte is PARAMETERS tag (82) */ if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) + NFC_EVT_TRANSACTION_PARAMS_TAG) { + devm_kfree(dev, transaction); return -EPROTO; + } transaction->params_len = skb->data[transaction->aid_len + 3]; memcpy(transaction->params, skb->data + From 0254f31a7df3bb3b90c2d9dd2d4052f7b95eb287 Mon Sep 17 00:00:00 2001 From: Martin Faltesek Date: Mon, 21 Nov 2022 18:42:46 -0600 Subject: [PATCH 679/963] nfc: st-nci: fix incorrect sizing calculations in EVT_TRANSACTION The transaction buffer is allocated by using the size of the packet buf, and subtracting two which seems intended to remove the two tags which are not present in the target structure. This calculation leads to under counting memory because of differences between the packet contents and the target structure. The aid_len field is a u8 in the packet, but a u32 in the structure, resulting in at least 3 bytes always being under counted. Further, the aid data is a variable length field in the packet, but fixed in the structure, so if this field is less than the max, the difference is added to the under counting. To fix, perform validation checks progressively to safely reach the next field, to determine the size of both buffers and verify both tags. Once all validation checks pass, allocate the buffer and copy the data. This eliminates freeing memory on the error path, as validation checks are moved ahead of memory allocation. Reported-by: Denis Efremov Reviewed-by: Guenter Roeck Fixes: 5d1ceb7f5e56 ("NFC: st21nfcb: Add HCI transaction event support") Signed-off-by: Martin Faltesek Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski --- drivers/nfc/st-nci/se.c | 51 +++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/drivers/nfc/st-nci/se.c b/drivers/nfc/st-nci/se.c index fc59916ae5ae..ec87dd21e054 100644 --- a/drivers/nfc/st-nci/se.c +++ b/drivers/nfc/st-nci/se.c @@ -312,6 +312,8 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, int r = 0; struct device *dev = &ndev->nfc_dev->dev; struct nfc_evt_transaction *transaction; + u32 aid_len; + u8 params_len; pr_debug("connectivity gate event: %x\n", event); @@ -325,28 +327,47 @@ static int st_nci_hci_connectivity_event_received(struct nci_dev *ndev, * Description Tag Length * AID 81 5 to 16 * PARAMETERS 82 0 to 255 + * + * The key differences are aid storage length is variably sized + * in the packet, but fixed in nfc_evt_transaction, and that + * the aid_len is u8 in the packet, but u32 in the structure, + * and the tags in the packet are not included in + * nfc_evt_transaction. + * + * size(b): 1 1 5-16 1 1 0-255 + * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4 + * mem name: aid_tag(M) aid_len aid params_tag(M) params_len params + * example: 0x81 5-16 X 0x82 0-255 X */ - if (skb->len < NFC_MIN_AID_LENGTH + 2 || - skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) + if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG) return -EPROTO; - transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL); + aid_len = skb->data[1]; + + if (skb->len < aid_len + 4 || + aid_len > sizeof(transaction->aid)) + return -EPROTO; + + params_len = skb->data[aid_len + 3]; + + /* Verify PARAMETERS tag is (82), and final check that there is + * enough space in the packet to read everything. + */ + if (skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG || + skb->len < aid_len + 4 + params_len) + return -EPROTO; + + transaction = devm_kzalloc(dev, sizeof(*transaction) + + params_len, GFP_KERNEL); if (!transaction) return -ENOMEM; - transaction->aid_len = skb->data[1]; - memcpy(transaction->aid, &skb->data[2], transaction->aid_len); + transaction->aid_len = aid_len; + transaction->params_len = params_len; - /* Check next byte is PARAMETERS tag (82) */ - if (skb->data[transaction->aid_len + 2] != - NFC_EVT_TRANSACTION_PARAMS_TAG) { - devm_kfree(dev, transaction); - return -EPROTO; - } - - transaction->params_len = skb->data[transaction->aid_len + 3]; - memcpy(transaction->params, skb->data + - transaction->aid_len + 4, transaction->params_len); + memcpy(transaction->aid, &skb->data[2], aid_len); + memcpy(transaction->params, &skb->data[aid_len + 4], + params_len); r = nfc_se_transaction(ndev->nfc_dev, host, transaction); break; From 9a234a2a085ab9fd2be8d0c1eedfcd10f74b97eb Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Tue, 22 Nov 2022 19:10:31 +0800 Subject: [PATCH 680/963] net: marvell: prestera: add missing unregister_netdev() in prestera_port_create() If prestera_port_sfp_bind() fails, unregister_netdev() should be called in error handling path. Compile tested only. Fixes: 52323ef75414 ("net: marvell: prestera: add phylink support") Signed-off-by: Zhang Changzhong Reviewed-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/1669115432-36841-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 24f9d6024745..47796e4d900c 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -746,6 +746,7 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) return 0; err_sfp_bind: + unregister_netdev(dev); err_register_netdev: prestera_port_list_del(port); err_port_init: From 290b5fe096e7dd0aad730d1af4f7f2d9fea43e11 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 22 Nov 2022 15:09:36 +0200 Subject: [PATCH 681/963] net: enetc: preserve TX ring priority across reconfiguration In the blamed commit, a rudimentary reallocation procedure for RX buffer descriptors was implemented, for the situation when their format changes between normal (no PTP) and extended (PTP). enetc_hwtstamp_set() calls enetc_close() and enetc_open() in a sequence, and this sequence loses information which was previously configured in the TX BDR Mode Register, specifically via the enetc_set_bdr_prio() call. The TX ring priority is configured by tc-mqprio and tc-taprio, and affects important things for TSN such as the TX time of packets. The issue manifests itself most visibly by the fact that isochron --txtime reports premature packet transmissions when PTP is first enabled on an enetc interface. Save the TX ring priority in a new field in struct enetc_bdr (occupies a 2 byte hole on arm64) in order to make this survive a ring reconfiguration. Fixes: 434cebabd3a2 ("enetc: Add dynamic allocation of extended Rx BD rings") Signed-off-by: Vladimir Oltean Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/20221122130936.1704151-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 8 ++++--- drivers/net/ethernet/freescale/enetc/enetc.h | 1 + .../net/ethernet/freescale/enetc/enetc_qos.c | 21 ++++++++++++------- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index f8c06c3f9464..8671591cb750 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2058,7 +2058,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring) /* enable Tx ints by setting pkt thr to 1 */ enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1); - tbmr = ENETC_TBMR_EN; + tbmr = ENETC_TBMR_EN | ENETC_TBMR_SET_PRIO(tx_ring->prio); if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX) tbmr |= ENETC_TBMR_VIH; @@ -2461,7 +2461,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) /* Reset all ring priorities to 0 */ for (i = 0; i < priv->num_tx_rings; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, 0); + tx_ring->prio = 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } return 0; @@ -2480,7 +2481,8 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) */ for (i = 0; i < num_tc; i++) { tx_ring = priv->tx_ring[i]; - enetc_set_bdr_prio(hw, tx_ring->index, i); + tx_ring->prio = i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); } /* Reset the number of netdev queues based on the TC count */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 161930a65f61..c6d8cc15c270 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -95,6 +95,7 @@ struct enetc_bdr { void __iomem *rcir; }; u16 index; + u16 prio; int bd_count; /* # of BDs */ int next_to_use; int next_to_clean; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index a842e1999122..fcebb54224c0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -137,6 +137,7 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) struct tc_taprio_qopt_offload *taprio = type_data; struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; + struct enetc_bdr *tx_ring; int err; int i; @@ -145,16 +146,20 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) if (priv->tx_ring[i]->tsd_enable) return -EBUSY; - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? i : 0); + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? i : 0; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } err = enetc_setup_taprio(ndev, taprio); - - if (err) - for (i = 0; i < priv->num_tx_rings; i++) - enetc_set_bdr_prio(hw, priv->tx_ring[i]->index, - taprio->enable ? 0 : i); + if (err) { + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + tx_ring->prio = taprio->enable ? 0 : i; + enetc_set_bdr_prio(hw, tx_ring->index, tx_ring->prio); + } + } return err; } From cd07eadd5147ffdae11b6fd28b77a3872f2a2484 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 22 Nov 2022 13:54:49 +0800 Subject: [PATCH 682/963] octeontx2-pf: Add check for devm_kcalloc As the devm_kcalloc may return NULL pointer, it should be better to add check for the return value, as same as the others. Fixes: e8e095b3b370 ("octeontx2-af: cn10k: Bandwidth profiles config support") Signed-off-by: Jiasheng Jiang Reviewed-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20221122055449.31247-1-jiasheng@iscas.ac.cn Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 7646bb2ec89b..a62c1b322012 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4985,6 +4985,8 @@ static int nix_setup_ipolicers(struct rvu *rvu, ipolicer->ref_count = devm_kcalloc(rvu->dev, ipolicer->band_prof.max, sizeof(u16), GFP_KERNEL); + if (!ipolicer->ref_count) + return -ENOMEM; } /* Set policer timeunit to 2us ie (19 + 1) * 100 nsec = 2us */ From 08e8a949f684e1fbc4b1efd2337d72ec8f3613d9 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 22 Nov 2022 20:19:40 +0800 Subject: [PATCH 683/963] net: wwan: t7xx: Fix the ACPI memory leak The ACPI buffer memory (buffer.pointer) should be freed as the buffer is not used after acpi_evaluate_object(), free it to prevent memory leak. Fixes: 13e920d93e37 ("net: wwan: t7xx: Add core components") Signed-off-by: Hanjun Guo Link: https://lore.kernel.org/r/1669119580-28977-1-git-send-email-guohanjun@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/wwan/t7xx/t7xx_modem_ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c index 3458af31e864..7d0f5e4f0a78 100644 --- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c +++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c @@ -165,6 +165,8 @@ static int t7xx_acpi_reset(struct t7xx_pci_dev *t7xx_dev, char *fn_name) return -EFAULT; } + kfree(buffer.pointer); + #endif return 0; } From 52f7cf70eb8fac6111786c59ae9dfc5cf2bee710 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 10 Nov 2022 21:47:07 +0800 Subject: [PATCH 684/963] net/mlx5: DR, Fix uninitialized var warning Smatch warns this: drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c:81 mlx5dr_table_set_miss_action() error: uninitialized symbol 'ret'. Initializing ret with -EOPNOTSUPP and fix missing action case. Fixes: 7838e1725394 ("net/mlx5: DR, Expose steering table functionality") Signed-off-by: YueHaibing Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c index 31d443dd8386..f68461b13391 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c @@ -46,7 +46,7 @@ static int dr_table_set_miss_action_nic(struct mlx5dr_domain *dmn, int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, struct mlx5dr_action *action) { - int ret; + int ret = -EOPNOTSUPP; if (action && action->action_type != DR_ACTION_TYP_FT) return -EOPNOTSUPP; @@ -67,6 +67,9 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl, goto out; } + if (ret) + goto out; + /* Release old action */ if (tbl->miss_action) refcount_dec(&tbl->miss_action->refcount); From 2318b8bb94a3a21363cd0d49cad5934bd1e2d60e Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 17 Nov 2022 09:48:13 +0200 Subject: [PATCH 685/963] net/mlx5: E-switch, Destroy legacy fdb table when needed The cited commit removes eswitch mode none. But when disabling sriov in legacy mode or changing from switchdev to legacy mode without sriov enabled, the legacy fdb table is not destroyed. It is not the right behavior. Destroy legacy fdb table in above two caes. Fixes: f019679ea5f2 ("net/mlx5: E-switch, Remove dependency between sriov and eswitch mode") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Reviewed-by: Eli Cohen Reviewed-by: Mark Bloch Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2169486c4bfb..374e3fbdc2cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1362,6 +1362,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) devl_rate_nodes_destroy(devlink); } + /* Destroy legacy fdb when disabling sriov in legacy mode. */ + if (esw->mode == MLX5_ESWITCH_LEGACY) + mlx5_eswitch_disable_locked(esw); esw->esw_funcs.num_vfs = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3fda75fe168c..8c6c9bcb3dc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3387,6 +3387,13 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, int err; esw->mode = MLX5_ESWITCH_LEGACY; + + /* If changing from switchdev to legacy mode without sriov enabled, + * no need to create legacy fdb. + */ + if (!mlx5_sriov_is_enabled(esw->dev)) + return 0; + err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); if (err) NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); From e87c6a832f889c093c055a30a7b8c6843e6573bf Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 4 Aug 2022 05:09:07 +0300 Subject: [PATCH 686/963] net/mlx5: E-switch, Fix duplicate lag creation If creating bond first and then enabling sriov in switchdev mode, will hit the following syndrome: mlx5_core 0000:08:00.0: mlx5_cmd_out_err:778:(pid 25543): CREATE_LAG(0x840) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x7d49cb), err(-22) The reason is because the offending patch removes eswitch mode none. In vf lag, the checking of eswitch mode none is replaced by checking if sriov is enabled. But when driver enables sriov, it triggers the bond workqueue task first and then setting sriov number in pci_enable_sriov(). So the check fails. Fix it by checking if sriov is enabled using eswitch internal counter that is set before triggering the bond workqueue task. Fixes: f019679ea5f2 ("net/mlx5: E-switch, Remove dependency between sriov and eswitch mode") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 8 ++++++++ drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f68dc2d0dbe6..3029bc1c0dd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -736,6 +736,14 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); +static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->esw_funcs.num_vfs; + + return 0; +} + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index be1307a63e6d..4070dc1d17cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -701,8 +701,9 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) #ifdef CONFIG_MLX5_ESWITCH dev = ldev->pf[MLX5_LAG_P1].dev; - if ((mlx5_sriov_is_enabled(dev)) && !is_mdev_switchdev_mode(dev)) - return false; + for (i = 0; i < ldev->ports; i++) + if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) + return false; mode = mlx5_eswitch_mode(dev); for (i = 0; i < ldev->ports; i++) From 3f5769a074c13d8f08455e40586600419e02a880 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 21 Nov 2022 19:22:04 +0800 Subject: [PATCH 687/963] net/mlx5: Fix uninitialized variable bug in outlen_write() If sscanf() return 0, outlen is uninitialized and used in kzalloc(), this is unexpected. We should return -EINVAL if the string is invalid. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: YueHaibing Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 74bd05e5dda2..e7a894ba5c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1497,8 +1497,8 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, return -EFAULT; err = sscanf(outlen_str, "%d", &outlen); - if (err < 0) - return err; + if (err != 1) + return -EINVAL; ptr = kzalloc(outlen, GFP_KERNEL); if (!ptr) From 52c795af04441d76f565c4634f893e5b553df2ae Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 14 Nov 2022 20:04:29 +0200 Subject: [PATCH 688/963] net/mlx5e: Fix use-after-free when reverting termination table When having multiple dests with termination tables and second one or afterwards fails the driver reverts usage of term tables but doesn't reset the assignment in attr->dests[num_vport_dests].termtbl which case a use-after-free when releasing the rule. Fix by resetting the assignment of termtbl to null. Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 108a3503f413..edd910258314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -312,6 +312,8 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + attr->dests[curr_dest].termtbl = NULL; + /* search for the destination associated with the * current term table */ From bc59c7d326d28a12f61cd068e459235a7a0bd009 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Oct 2022 17:37:29 +0300 Subject: [PATCH 689/963] net/mlx5e: Fix a couple error codes If kvzalloc() fails then return -ENOMEM. Don't return success. Fixes: 3b20949cb21b ("net/mlx5e: Add MACsec RX steering rules") Fixes: e467b283ffd5 ("net/mlx5e: Add MACsec TX steering rules") Signed-off-by: Dan Carpenter Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 1ac0cf04e811..96cec6d826c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -250,7 +250,7 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_MACSEC); if (!ns) @@ -261,8 +261,10 @@ static int macsec_fs_tx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto out_spec; + } tx_tables = &tx_fs->tables; ft_crypto = &tx_tables->ft_crypto; @@ -898,7 +900,7 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; u32 *flow_group_in; - int err = 0; + int err; ns = mlx5_get_flow_namespace(macsec_fs->mdev, MLX5_FLOW_NAMESPACE_KERNEL_RX_MACSEC); if (!ns) @@ -909,8 +911,10 @@ static int macsec_fs_rx_create(struct mlx5e_macsec_fs *macsec_fs) return -ENOMEM; flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!flow_group_in) + if (!flow_group_in) { + err = -ENOMEM; goto free_spec; + } rx_tables = &rx_fs->tables; ft_crypto = &rx_tables->ft_crypto; From 406e6db7fcaf2a47f6ac8bef47057305c6bc8b0e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 8 Nov 2022 22:06:14 +0800 Subject: [PATCH 690/963] net/mlx5e: Use kvfree() in mlx5e_accel_fs_tcp_create() 'accel_tcp' is allocated by kvzalloc(), which should freed by kvfree(). Fixes: f52f2faee581 ("net/mlx5e: Introduce flow steering API") Signed-off-by: YueHaibing Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 285d32d2fd08..d7c020f72401 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -365,7 +365,7 @@ void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); } @@ -397,7 +397,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) err_destroy_tables: while (--i >= 0) accel_fs_tcp_destroy_table(fs, i); - kfree(accel_tcp); + kvfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); return err; } From 813115c4669d74cdf6fc253c5a1768d1202073c3 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Thu, 27 Oct 2022 15:47:17 +0300 Subject: [PATCH 691/963] net/mlx5e: MACsec, fix RX data path 16 RX security channel limit Currently the data path metadata flow id mask wrongly limits the number of different RX security channels (SC) to 16, whereas in adding RX SC the limit is "2^16 - 1" this cause an overlap in metadata flow id once more than 16 RX SCs is added, this corrupts MACsec RX offloaded flow handling. Fix by using the correct mask, while at it improve code to use this mask when adding the Rx rule and improve visibility of such errors by adding debug massage. Fixes: b7c9400cbc48 ("net/mlx5e: Implement MACsec Rx data path using MACsec skb_metadata_dst") Signed-off-by: Raed Salem Reviewed-by: Emeel Hakim Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 11 ++++++++--- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.h | 6 ++++-- .../ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c | 4 ++-- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 3dc6c987b8da..96fa553ef93a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -736,9 +736,14 @@ static int mlx5e_macsec_add_rxsc(struct macsec_context *ctx) sc_xarray_element->rx_sc = rx_sc; err = xa_alloc(&macsec->sc_xarray, &sc_xarray_element->fs_id, sc_xarray_element, - XA_LIMIT(1, USHRT_MAX), GFP_KERNEL); - if (err) + XA_LIMIT(1, MLX5_MACEC_RX_FS_ID_MAX), GFP_KERNEL); + if (err) { + if (err == -EBUSY) + netdev_err(ctx->netdev, + "MACsec offload: unable to create entry for RX SC (%d Rx SCs already allocated)\n", + MLX5_MACEC_RX_FS_ID_MAX); goto destroy_sc_xarray_elemenet; + } rx_sc->md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); if (!rx_sc->md_dst) { @@ -1748,7 +1753,7 @@ void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, if (!macsec) return; - fs_id = MLX5_MACSEC_METADATA_HANDLE(macsec_meta_data); + fs_id = MLX5_MACSEC_RX_METADAT_HANDLE(macsec_meta_data); rcu_read_lock(); sc_xarray_element = xa_load(&macsec->sc_xarray, fs_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h index d580b4a91253..347380a2cd9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.h @@ -10,9 +10,11 @@ #include #include -/* Bit31 - 30: MACsec marker, Bit3-0: MACsec id */ +/* Bit31 - 30: MACsec marker, Bit15-0: MACsec id */ +#define MLX5_MACEC_RX_FS_ID_MAX USHRT_MAX /* Must be power of two */ +#define MLX5_MACSEC_RX_FS_ID_MASK MLX5_MACEC_RX_FS_ID_MAX #define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) -#define MLX5_MACSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(3, 0)) +#define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) struct mlx5e_priv; struct mlx5e_macsec; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index 96cec6d826c2..f87a1c4021bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -1146,10 +1146,10 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, ft_crypto = &rx_tables->ft_crypto; /* Set bit[31 - 30] macsec marker - 0x01 */ - /* Set bit[3-0] fs id */ + /* Set bit[15-0] fs id */ MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); - MLX5_SET(set_action_in, action, data, fs_id | BIT(30)); + MLX5_SET(set_action_in, action, data, MLX5_MACSEC_RX_METADAT_HANDLE(fs_id) | BIT(30)); MLX5_SET(set_action_in, action, offset, 0); MLX5_SET(set_action_in, action, length, 32); From 9b9e23c4dc2b632ece44c68ce6aebc0bf841d6a2 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Sun, 30 Oct 2022 17:16:58 +0200 Subject: [PATCH 692/963] net/mlx5e: MACsec, fix memory leak when MACsec device is deleted When the MACsec netdevice is deleted, all related Rx/Tx HW/SW states should be released/deallocated, however currently part of the Rx security channel association data is not cleaned properly, hence the memory leaks. Fix by make sure all related Rx Sc resources are cleaned/freed, while at it improve code by grouping release SC context in a function so it can be used in both delete MACsec device and delete Rx SC operations. Fixes: 5a39816a75e5 ("net/mlx5e: Add MACsec offload SecY support") Signed-off-by: Raed Salem Reviewed-by: Emeel Hakim Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/macsec.c | 77 ++++++++----------- 1 file changed, 33 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 96fa553ef93a..b51de07d5bad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -823,16 +823,43 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) return err; } +static void macsec_del_rxsc_ctx(struct mlx5e_macsec *macsec, struct mlx5e_macsec_rx_sc *rx_sc) +{ + struct mlx5e_macsec_sa *rx_sa; + int i; + + for (i = 0; i < MACSEC_NUM_AN; ++i) { + rx_sa = rx_sc->rx_sa[i]; + if (!rx_sa) + continue; + + mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); + mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); + + kfree(rx_sa); + rx_sc->rx_sa[i] = NULL; + } + + /* At this point the relevant MACsec offload Rx rule already removed at + * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current + * Rx related data propagating using xa_erase which uses rcu to sync, + * once fs_id is erased then this rx_sc is hidden from datapath. + */ + list_del_rcu(&rx_sc->rx_sc_list_element); + xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); + metadata_dst_free(rx_sc->md_dst); + kfree(rx_sc->sc_xarray_element); + kfree_rcu(rx_sc); +} + static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) { struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec *macsec; struct list_head *list; int err = 0; - int i; mutex_lock(&priv->macsec->lock); @@ -854,31 +881,7 @@ static int mlx5e_macsec_del_rxsc(struct macsec_context *ctx) goto out; } - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - -/* - * At this point the relevant MACsec offload Rx rule already removed at - * mlx5e_macsec_cleanup_sa need to wait for datapath to finish current - * Rx related data propagating using xa_erase which uses rcu to sync, - * once fs_id is erased then this rx_sc is hidden from datapath. - */ - list_del_rcu(&rx_sc->rx_sc_list_element); - xa_erase(&macsec->sc_xarray, rx_sc->sc_xarray_element->fs_id); - metadata_dst_free(rx_sc->md_dst); - kfree(rx_sc->sc_xarray_element); - - kfree_rcu(rx_sc); - + macsec_del_rxsc_ctx(macsec, rx_sc); out: mutex_unlock(&macsec->lock); @@ -1239,7 +1242,6 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) struct mlx5e_priv *priv = netdev_priv(ctx->netdev); struct mlx5e_macsec_device *macsec_device; struct mlx5e_macsec_rx_sc *rx_sc, *tmp; - struct mlx5e_macsec_sa *rx_sa; struct mlx5e_macsec_sa *tx_sa; struct mlx5e_macsec *macsec; struct list_head *list; @@ -1268,28 +1270,15 @@ static int mlx5e_macsec_del_secy(struct macsec_context *ctx) } list = &macsec_device->macsec_rx_sc_list_head; - list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) { - for (i = 0; i < MACSEC_NUM_AN; ++i) { - rx_sa = rx_sc->rx_sa[i]; - if (!rx_sa) - continue; - - mlx5e_macsec_cleanup_sa(macsec, rx_sa, false); - mlx5_destroy_encryption_key(macsec->mdev, rx_sa->enc_key_id); - kfree(rx_sa); - rx_sc->rx_sa[i] = NULL; - } - - list_del_rcu(&rx_sc->rx_sc_list_element); - - kfree_rcu(rx_sc); - } + list_for_each_entry_safe(rx_sc, tmp, list, rx_sc_list_element) + macsec_del_rxsc_ctx(macsec, rx_sc); kfree(macsec_device->dev_addr); macsec_device->dev_addr = NULL; list_del_rcu(&macsec_device->macsec_device_list_element); --macsec->num_of_devices; + kfree(macsec_device); out: mutex_unlock(&macsec->lock); From eead5ea2fce4196139f399a5727602c3747e1370 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Mon, 31 Oct 2022 08:22:04 +0200 Subject: [PATCH 693/963] net/mlx5e: MACsec, fix update Rx secure channel active field The main functionality for this operation is to update the active state of the Rx security channel (SC) if the new active setting is different from the current active state of this Rx SC, however the relevant active state check is done post updating the current active state to match the new active state, effectively blocks any offload state update for the Rx SC in question. Fix by delay the assignment to be post the relevant check. Fixes: aae3454e4d4c ("net/mlx5e: Add MACsec offload Rx command support") Signed-off-by: Raed Salem Reviewed-by: Emeel Hakim Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index b51de07d5bad..9c891a877998 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -803,10 +803,10 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) goto out; } - rx_sc->active = ctx_rx_sc->active; if (rx_sc->active == ctx_rx_sc->active) goto out; + rx_sc->active = ctx_rx_sc->active; for (i = 0; i < MACSEC_NUM_AN; ++i) { rx_sa = rx_sc->rx_sa[i]; if (!rx_sa) From ceb51b273ec0b1a32de8a167c28a602bb3d290a1 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Mon, 31 Oct 2022 09:24:06 +0200 Subject: [PATCH 694/963] net/mlx5e: MACsec, fix mlx5e_macsec_update_rxsa bail condition and functionality Fix update Rx SA wrong bail condition, naturally update functionality needs to check that something changed otherwise bailout currently the active state check does just the opposite, furthermore unlike deactivate path which remove the macsec rules to deactivate the offload, the activation path does not include the counter part installation of the macsec rules. Fix by using correct bailout condition and when Rx SA changes state to active then add the relevant macsec rules. While at it, refine function name to reflect more precisely its role. Fixes: aae3454e4d4c ("net/mlx5e: Add MACsec offload Rx command support") Signed-off-by: Raed Salem Reviewed-by: Emeel Hakim Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/macsec.c | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 9c891a877998..c19581f1f733 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -427,15 +427,15 @@ mlx5e_macsec_get_rx_sc_from_sc_list(const struct list_head *list, sci_t sci) return NULL; } -static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, - struct mlx5e_macsec_sa *rx_sa, - bool active) +static int macsec_rx_sa_active_update(struct macsec_context *ctx, + struct mlx5e_macsec_sa *rx_sa, + bool active) { - struct mlx5_core_dev *mdev = macsec->mdev; - struct mlx5_macsec_obj_attrs attrs = {}; + struct mlx5e_priv *priv = netdev_priv(ctx->netdev); + struct mlx5e_macsec *macsec = priv->macsec; int err = 0; - if (rx_sa->active != active) + if (rx_sa->active == active) return 0; rx_sa->active = active; @@ -444,13 +444,11 @@ static int mlx5e_macsec_update_rx_sa(struct mlx5e_macsec *macsec, return 0; } - attrs.sci = cpu_to_be64((__force u64)rx_sa->sci); - attrs.enc_key_id = rx_sa->enc_key_id; - err = mlx5e_macsec_create_object(mdev, &attrs, false, &rx_sa->macsec_obj_id); + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); if (err) - return err; + rx_sa->active = false; - return 0; + return err; } static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) @@ -812,7 +810,7 @@ static int mlx5e_macsec_upd_rxsc(struct macsec_context *ctx) if (!rx_sa) continue; - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, rx_sa->active && ctx_rx_sc->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, rx_sa->active && ctx_rx_sc->active); if (err) goto out; } @@ -1023,7 +1021,7 @@ static int mlx5e_macsec_upd_rxsa(struct macsec_context *ctx) goto out; } - err = mlx5e_macsec_update_rx_sa(macsec, rx_sa, ctx_rx_sa->active); + err = macsec_rx_sa_active_update(ctx, rx_sa, ctx_rx_sa->active); out: mutex_unlock(&macsec->lock); From 194cc051e29ea1641d424126b8f28f9d31d5c655 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Tue, 8 Nov 2022 16:19:29 +0200 Subject: [PATCH 695/963] net/mlx5e: MACsec, fix add Rx security association (SA) rule memory leak Currently MACsec's add Rx SA flow steering (fs) rule routine uses a spec object which is dynamically allocated and do not free it upon leaving. The above led to a memory leak. Fix by freeing dynamically allocated objects. Fixes: 3b20949cb21b ("net/mlx5e: Add MACsec RX steering rules") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c index f87a1c4021bc..5b658a5588c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c @@ -1209,6 +1209,7 @@ macsec_fs_rx_add_rule(struct mlx5e_macsec_fs *macsec_fs, rx_rule->rule[1] = rule; } + kvfree(spec); return macsec_rule; err: From c0071be0e16c461680d87b763ba1ee5e46548fde Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Mon, 31 Oct 2022 11:07:59 +0200 Subject: [PATCH 696/963] net/mlx5e: MACsec, remove replay window size limitation in offload path Currently offload path limits replay window size to 32/64/128/256 bits, such a limitation should not exist since software allows it. Remove such limitation. Fixes: eb43846b43c3 ("net/mlx5e: Support MACsec offload replay window") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/macsec.c | 16 ---------------- include/linux/mlx5/mlx5_ifc.h | 7 ------- 2 files changed, 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index c19581f1f733..72f8be65fa90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -229,22 +229,6 @@ static int macsec_set_replay_protection(struct mlx5_macsec_obj_attrs *attrs, voi if (!attrs->replay_protect) return 0; - switch (attrs->replay_window) { - case 256: - window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_256BIT; - break; - case 128: - window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_128BIT; - break; - case 64: - window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_64BIT; - break; - case 32: - window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_32BIT; - break; - default: - return -EINVAL; - } MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz); MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_REPLAY_PROTECTION); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5a4e914e2a6f..981fc7dfa408 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -11611,13 +11611,6 @@ enum { MLX5_MACSEC_ASO_REPLAY_PROTECTION = 0x1, }; -enum { - MLX5_MACSEC_ASO_REPLAY_WIN_32BIT = 0x0, - MLX5_MACSEC_ASO_REPLAY_WIN_64BIT = 0x1, - MLX5_MACSEC_ASO_REPLAY_WIN_128BIT = 0x2, - MLX5_MACSEC_ASO_REPLAY_WIN_256BIT = 0x3, -}; - #define MLX5_MACSEC_ASO_INC_SN 0x2 #define MLX5_MACSEC_ASO_REG_C_4_5 0x2 From 7c5578e29963c35bb14d08b15df77a0c11f71fc5 Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Tue, 8 Nov 2022 14:30:33 +0200 Subject: [PATCH 697/963] net/mlx5e: MACsec, fix Tx SA active field update Currently during update Tx security association (SA) flow, the Tx SA active state is updated only if the Tx SA in question is the same SA that the MACsec interface is using for Tx,in consequence when the MACsec interface chose to work with this Tx SA later, where this SA for example should have been updated to active state and it was not, the relevant Tx SA HW context won't be installed, hence the MACSec flow won't be offloaded. Fix by update Tx SA active state as part of update flow regardless whether the SA in question is the same Tx SA used by the MACsec interface. Fixes: 8ff0ac5be144 ("net/mlx5: Add MACsec offload Tx command support") Signed-off-by: Raed Salem Reviewed-by: Emeel Hakim Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 72f8be65fa90..137b34347de1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -602,6 +602,7 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) if (tx_sa->active == ctx_tx_sa->active) goto out; + tx_sa->active = ctx_tx_sa->active; if (tx_sa->assoc_num != tx_sc->encoding_sa) goto out; @@ -617,8 +618,6 @@ static int mlx5e_macsec_upd_txsa(struct macsec_context *ctx) mlx5e_macsec_cleanup_sa(macsec, tx_sa, true); } - - tx_sa->active = ctx_tx_sa->active; out: mutex_unlock(&macsec->lock); From 9034b29251818ab7b4b38bf6ca860cee45d2726a Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Wed, 9 Nov 2022 15:14:28 +0200 Subject: [PATCH 698/963] net/mlx5e: MACsec, block offload requests with encrypt off Currently offloading MACsec with authentication only (encrypt property set to off) is not supported, block such requests when adding/updating a macsec device. Fixes: 8ff0ac5be144 ("net/mlx5: Add MACsec offload Tx command support") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 137b34347de1..0d6dc394a12a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -458,6 +458,11 @@ static bool mlx5e_macsec_secy_features_validate(struct macsec_context *ctx) return false; } + if (!ctx->secy->tx_sc.encrypt) { + netdev_err(netdev, "MACsec offload: encrypt off isn't supported\n"); + return false; + } + return true; } From b0686565946368892c2cdf92f102392e24823588 Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Tue, 22 Nov 2022 23:00:46 +0800 Subject: [PATCH 699/963] virtio_net: Fix probe failed when modprobe virtio_net When doing the following test steps, an error was found: step 1: modprobe virtio_net succeeded # modprobe virtio_net <-- OK step 2: fault injection in register_netdevice() # modprobe -r virtio_net <-- OK # ... FAULT_INJECTION: forcing a failure. name failslab, interval 1, probability 0, space 0, times 0 CPU: 0 PID: 3521 Comm: modprobe Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), Call Trace: ... should_failslab+0xa/0x20 ... dev_set_name+0xc0/0x100 netdev_register_kobject+0xc2/0x340 register_netdevice+0xbb9/0x1320 virtnet_probe+0x1d72/0x2658 [virtio_net] ... virtio_net: probe of virtio0 failed with error -22 step 3: modprobe virtio_net failed # modprobe virtio_net <-- failed virtio_net: probe of virtio0 failed with error -2 The root cause of the problem is that the queues are not disable on the error handling path when register_netdevice() fails in virtnet_probe(), resulting in an error "-ENOENT" returned in the next modprobe call in setup_vq(). virtio_pci_modern_device uses virtqueues to send or receive message, and "queue_enable" records whether the queues are available. In vp_modern_find_vqs(), all queues will be selected and activated, but once queues are enabled there is no way to go back except reset. Fix it by reset virtio device on error handling path. This makes error handling follow the same order as normal device cleanup in virtnet_remove() which does: unregister, destroy failover, then reset. And that flow is better tested than error handling so we can be reasonably sure it works well. Fixes: 024655555021 ("virtio_net: fix use after free on allocation failure") Signed-off-by: Li Zetao Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20221122150046.3910638-1-lizetao1@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7106932c6f88..86e52454b5b5 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3949,12 +3949,11 @@ static int virtnet_probe(struct virtio_device *vdev) return 0; free_unregister_netdev: - virtio_reset_device(vdev); - unregister_netdev(dev); free_failover: net_failover_destroy(vi->failover); free_vqs: + virtio_reset_device(vdev); cancel_delayed_work_sync(&vi->refill); free_receive_page_frags(vi); virtnet_del_vqs(vi); From 6aae1bcb41c7aefd28bf5d90e36ebdd151c2d8ba Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Wed, 23 Nov 2022 09:16:17 +0800 Subject: [PATCH 700/963] net: altera_tse: release phylink resources in tse_shutdown() Call phylink_disconnect_phy() in tse_shutdown() to release the resources occupied by phylink_of_phy_connect() in the tse_open(). Fixes: fef2998203e1 ("net: altera: tse: convert to phylink") Signed-off-by: Liu Jian Link: https://lore.kernel.org/r/20221123011617.332302-1-liujian56@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/altera/altera_tse_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 7633b227b2ca..711d5b5a4c49 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -990,6 +990,7 @@ static int tse_shutdown(struct net_device *dev) int ret; phylink_stop(priv->phylink); + phylink_disconnect_phy(priv->phylink); netif_stop_queue(dev); napi_disable(&priv->napi); From ad17c2a3f11b0f6b122e7842d8f7d9a5fcc7ac63 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Wed, 23 Nov 2022 14:59:19 +0800 Subject: [PATCH 701/963] octeontx2-af: Fix reference count issue in rvu_sdp_init() pci_get_device() will decrease the reference count for the *from* parameter. So we don't need to call put_device() to decrease the reference. Let's remove the put_device() in the loop and only decrease the reference count of the returned 'pdev' for the last loop because it will not be passed to pci_get_device() as input parameter. We don't need to check if 'pdev' is NULL because it is already checked inside pci_dev_put(). Also add pci_dev_put() for the error path. Fixes: fe1939bb2340 ("octeontx2-af: Add SDP interface support") Signed-off-by: Xiongfeng Wang Reviewed-by: Saeed Mahameed Link: https://lore.kernel.org/r/20221123065919.31499-1-wangxiongfeng2@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c index b04fb226f708..ae50d56258ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_sdp.c @@ -62,15 +62,18 @@ int rvu_sdp_init(struct rvu *rvu) pfvf->sdp_info = devm_kzalloc(rvu->dev, sizeof(struct sdp_node_info), GFP_KERNEL); - if (!pfvf->sdp_info) + if (!pfvf->sdp_info) { + pci_dev_put(pdev); return -ENOMEM; + } dev_info(rvu->dev, "SDP PF number:%d\n", sdp_pf_num[i]); - put_device(&pdev->dev); i++; } + pci_dev_put(pdev); + return 0; } From af169b7759a9b9369b5106cd07a25c57ce60119e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 21 Nov 2022 15:57:44 +0100 Subject: [PATCH 702/963] perf: Fixup SIGTRAP and sample_flags interaction The perf_event_attr::sigtrap functionality relies on data->addr being set. However commit 7b0846301531 ("perf: Use sample_flags for addr") changed this to only initialize data->addr when not 0. Fixes: 7b0846301531 ("perf: Use sample_flags for addr") Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y3426b4OimE%2FI5po%40hirez.programming.kicks-ass.net --- kernel/events/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 884871427a94..f2bb27e5c316 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9328,7 +9328,10 @@ static int __perf_event_overflow(struct perf_event *event, */ WARN_ON_ONCE(event->pending_sigtrap != pending_id); } - event->pending_addr = data->addr; + + event->pending_addr = 0; + if (data->sample_flags & PERF_SAMPLE_ADDR) + event->pending_addr = data->addr; irq_work_queue(&event->pending_irq); } From 030a976efae83f7b6593afb11a8254d42f9290fe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 19 Nov 2022 10:45:54 +0800 Subject: [PATCH 703/963] perf: Consider OS filter fail Some PMUs (notably the traditional hardware kind) have boundary issues with the OS filter. Specifically, it is possible for perf_event_attr::exclude_kernel=1 events to trigger in-kernel due to SKID or errata. This can upset the sigtrap logic some and trigger the WARN. However, if this invalid sample is the first we must not loose the SIGTRAP, OTOH if it is the second, it must not override the pending_addr with a (possibly) invalid one. Fixes: ca6c21327c6a ("perf: Fix missing SIGTRAPs") Reported-by: Pengfei Xu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Marco Elver Tested-by: Pengfei Xu Link: https://lkml.kernel.org/r/Y3hDYiXwRnJr8RYG@xpf.sh.intel.com --- kernel/events/core.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f2bb27e5c316..9d15d2d96119 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9273,6 +9273,19 @@ int perf_event_account_interrupt(struct perf_event *event) return __perf_event_account_interrupt(event, 1); } +static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs) +{ + /* + * Due to interrupt latency (AKA "skid"), we may enter the + * kernel before taking an overflow, even if the PMU is only + * counting user events. + */ + if (event->attr.exclude_kernel && !user_mode(regs)) + return false; + + return true; +} + /* * Generic event overflow handling, sampling. */ @@ -9306,6 +9319,13 @@ static int __perf_event_overflow(struct perf_event *event, } if (event->attr.sigtrap) { + /* + * The desired behaviour of sigtrap vs invalid samples is a bit + * tricky; on the one hand, one should not loose the SIGTRAP if + * it is the first event, on the other hand, we should also not + * trigger the WARN or override the data address. + */ + bool valid_sample = sample_is_allowed(event, regs); unsigned int pending_id = 1; if (regs) @@ -9313,7 +9333,7 @@ static int __perf_event_overflow(struct perf_event *event, if (!event->pending_sigtrap) { event->pending_sigtrap = pending_id; local_inc(&event->ctx->nr_pending); - } else if (event->attr.exclude_kernel) { + } else if (event->attr.exclude_kernel && valid_sample) { /* * Should not be able to return to user space without * consuming pending_sigtrap; with exceptions: @@ -9330,7 +9350,7 @@ static int __perf_event_overflow(struct perf_event *event, } event->pending_addr = 0; - if (data->sample_flags & PERF_SAMPLE_ADDR) + if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR)) event->pending_addr = data->addr; irq_work_queue(&event->pending_irq); } From 661e5ebbafd26d9d2e3c749f5cf591e55c7364f5 Mon Sep 17 00:00:00 2001 From: Yu Liao Date: Wed, 23 Nov 2022 16:22:36 +0800 Subject: [PATCH 704/963] net: thunderx: Fix the ACPI memory leak The ACPI buffer memory (string.pointer) should be freed as the buffer is not used after returning from bgx_acpi_match_id(), free it to prevent memory leak. Fixes: 46b903a01c05 ("net, thunder, bgx: Add support to get MAC address from ACPI.") Signed-off-by: Yu Liao Link: https://lore.kernel.org/r/20221123082237.1220521-1-liaoyu15@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 2f6484dc186a..7eb2ddbe9bad 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1436,8 +1436,10 @@ static acpi_status bgx_acpi_match_id(acpi_handle handle, u32 lvl, return AE_OK; } - if (strncmp(string.pointer, bgx_sel, 4)) + if (strncmp(string.pointer, bgx_sel, 4)) { + kfree(string.pointer); return AE_OK; + } acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, bgx_acpi_register_phy, NULL, bgx, NULL); From 6a3fc8c330d1c1fa3d8773d7d38a7c55c4900dfe Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Sat, 12 Nov 2022 16:40:59 +0100 Subject: [PATCH 705/963] ARM: at91: fix build for SAMA5D3 w/o L2 cache The L2 cache is present on the newer SAMA5D2 and SAMA5D4 families, but apparently not for the older SAMA5D3. Solves a build-time regression with the following symptom: sama5.c:(.init.text+0x48): undefined reference to `outer_cache' Fixes: 3b5a7ca7d252 ("ARM: at91: setup outer cache .write_sec() callback if needed") Signed-off-by: Peter Rosin [claudiu.beznea: delete "At least not always." from commit description] Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/b7f8dacc-5e1f-0eb2-188e-3ad9a9f7613d@axentia.se --- arch/arm/mach-at91/sama5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c index 67ed68fbe3a5..bf2b5c6a18c6 100644 --- a/arch/arm/mach-at91/sama5.c +++ b/arch/arm/mach-at91/sama5.c @@ -26,7 +26,7 @@ static void sama5_l2c310_write_sec(unsigned long val, unsigned reg) static void __init sama5_secure_cache_init(void) { sam_secure_init(); - if (sam_linux_is_optee_available()) + if (IS_ENABLED(CONFIG_OUTER_CACHE) && sam_linux_is_optee_available()) outer_cache.write_sec = sama5_l2c310_write_sec; } From 89d21e259a94f7d5582ec675aa445f5a79f347e4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 24 Nov 2022 09:37:27 +0100 Subject: [PATCH 706/963] powerpc/bpf/32: Fix Oops on tail call tests test_bpf tail call tests end up as: test_bpf: #0 Tail call leaf jited:1 85 PASS test_bpf: #1 Tail call 2 jited:1 111 PASS test_bpf: #2 Tail call 3 jited:1 145 PASS test_bpf: #3 Tail call 4 jited:1 170 PASS test_bpf: #4 Tail call load/store leaf jited:1 190 PASS test_bpf: #5 Tail call load/store jited:1 BUG: Unable to handle kernel data access on write at 0xf1b4e000 Faulting instruction address: 0xbe86b710 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K MMU=Hash PowerMac Modules linked in: test_bpf(+) CPU: 0 PID: 97 Comm: insmod Not tainted 6.1.0-rc4+ #195 Hardware name: PowerMac3,1 750CL 0x87210 PowerMac NIP: be86b710 LR: be857e88 CTR: be86b704 REGS: f1b4df20 TRAP: 0300 Not tainted (6.1.0-rc4+) MSR: 00009032 CR: 28008242 XER: 00000000 DAR: f1b4e000 DSISR: 42000000 GPR00: 00000001 f1b4dfe0 c11d2280 00000000 00000000 00000000 00000002 00000000 GPR08: f1b4e000 be86b704 f1b4e000 00000000 00000000 100d816a f2440000 fe73baa8 GPR16: f2458000 00000000 c1941ae4 f1fe2248 00000045 c0de0000 f2458030 00000000 GPR24: 000003e8 0000000f f2458000 f1b4dc90 3e584b46 00000000 f24466a0 c1941a00 NIP [be86b710] 0xbe86b710 LR [be857e88] __run_one+0xec/0x264 [test_bpf] Call Trace: [f1b4dfe0] [00000002] 0x2 (unreliable) Instruction dump: XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX ---[ end trace 0000000000000000 ]--- This is a tentative to write above the stack. The problem is encoutered with tests added by commit 38608ee7b690 ("bpf, tests: Add load store test case for tail call") This happens because tail call is done to a BPF prog with a different stack_depth. At the time being, the stack is kept as is when the caller tail calls its callee. But at exit, the callee restores the stack based on its own properties. Therefore here, at each run, r1 is erroneously increased by 32 - 16 = 16 bytes. This was done that way in order to pass the tail call count from caller to callee through the stack. As powerpc32 doesn't have a red zone in the stack, it was necessary the maintain the stack as is for the tail call. But it was not anticipated that the BPF frame size could be different. Let's take a new approach. Use register r4 to carry the tail call count during the tail call, and save it into the stack at function entry if required. This means the input parameter must be in r3, which is more correct as it is a 32 bits parameter, then tail call better match with normal BPF function entry, the down side being that we move that input parameter back and forth between r3 and r4. That can be optimised later. Doing that also has the advantage of maximising the common parts between tail calls and a normal function exit. With the fix, tail call tests are now successfull: test_bpf: #0 Tail call leaf jited:1 53 PASS test_bpf: #1 Tail call 2 jited:1 115 PASS test_bpf: #2 Tail call 3 jited:1 154 PASS test_bpf: #3 Tail call 4 jited:1 165 PASS test_bpf: #4 Tail call load/store leaf jited:1 101 PASS test_bpf: #5 Tail call load/store jited:1 141 PASS test_bpf: #6 Tail call error path, max count reached jited:1 994 PASS test_bpf: #7 Tail call count preserved across function calls jited:1 140975 PASS test_bpf: #8 Tail call error path, NULL target jited:1 110 PASS test_bpf: #9 Tail call error path, index out of range jited:1 69 PASS test_bpf: test_tail_calls: Summary: 10 PASSED, 0 FAILED, [10/10 JIT'ed] Suggested-by: Naveen N. Rao Fixes: 51c66ad849a7 ("powerpc/bpf: Implement extended BPF on PPC32") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Tested-by: Naveen N. Rao Link: https://lore.kernel.org/r/757acccb7fbfc78efa42dcf3c974b46678198905.1669278887.git.christophe.leroy@csgroup.eu --- arch/powerpc/net/bpf_jit_comp32.c | 56 +++++++++++++------------------ 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43f1c76d48ce..a379b0ce19ff 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ + EMIT(PPC_RAW_LI(_R4, 0)); + +#define BPF_TAILCALL_PROLOGUE_SIZE 4 + + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); + + if (ctx->seen & SEEN_TAILCALL) + EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); + /* First arg comes in as a 32 bits pointer. */ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); - EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); - - /* - * Initialize tail_call_cnt in stack frame if we do tail calls. - * Otherwise, put in NOPs so that it can be skipped when we are - * invoked through a tail call. - */ - if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, - bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - else - EMIT(PPC_RAW_NOP()); - -#define BPF_TAILCALL_PROLOGUE_SIZE 16 /* * We need a stack frame, but we don't necessarily need to @@ -170,6 +166,16 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + + /* Tear down our stack frame */ + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); + + if (ctx->seen & SEEN_FUNC) + EMIT(PPC_RAW_MTLR(_R0)); + } void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) @@ -178,16 +184,6 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); - /* Tear down our stack frame */ - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - - EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_BLR()); } @@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) @@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(_R3)); EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); + /* Put tail_call_cnt in r4 */ + EMIT(PPC_RAW_MR(_R4, _R0)); + /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); From 75c1d995db57229e5c7b1c830e4a2a14bbf94adb Mon Sep 17 00:00:00 2001 From: Ulises Mendez Martinez Date: Thu, 24 Nov 2022 12:00:01 +0000 Subject: [PATCH 707/963] ANDROID: kleaf: Rename allmodconfig targets * This is to follow the convention: kernel__ Bug: 260313194 Change-Id: I643fef0ed77b24c6564f8b8e1ed152180beeaf80 Signed-off-by: Ulises Mendez Martinez --- BUILD.bazel | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 68b160637797..5c9b101a7224 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -11,17 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +load("//build/bazel_common_rules/dist:dist.bzl", "copy_to_dist_dir") +load("//build/kernel/kleaf:common_kernels.bzl", "define_common_kernels", "define_db845c") +load("//build/kernel/kleaf:kernel.bzl", "ddk_headers", "kernel_build", "kernel_images", "kernel_modules_install") +load(":modules.bzl", "COMMON_GKI_MODULES_LIST") + package( default_visibility = [ "//visibility:public", ], ) -load("//build/bazel_common_rules/dist:dist.bzl", "copy_to_dist_dir") -load("//build/kernel/kleaf:common_kernels.bzl", "define_common_kernels", "define_db845c") -load("//build/kernel/kleaf:kernel.bzl", "ddk_headers", "kernel_build", "kernel_images", "kernel_modules_install") -load(":modules.bzl", "COMMON_GKI_MODULES_LIST") - define_common_kernels(target_configs = { "kernel_aarch64": { "module_implicit_outs": COMMON_GKI_MODULES_LIST, @@ -330,9 +330,9 @@ copy_to_dist_dir( # - --allow-undeclared-modules must be used so modules are not declared or copied. # - No dist target because these are build tests. We don't care about the artifacts. -# tools/bazel build --allow_undeclared_modules //common:allmodconfig_aarch64 +# tools/bazel build --allow_undeclared_modules //common:kernel_aarch64_allmodconfig kernel_build( - name = "allmodconfig_aarch64", + name = "kernel_aarch64_allmodconfig", # Hack to actually check the build. # Otherwise, Bazel thinks that there are no output files, and skip building. outs = [".config"], @@ -340,9 +340,9 @@ kernel_build( visibility = ["//visibility:private"], ) -# tools/bazel build --allow_undeclared_modules //common:allmodconfig_x86_64 +# tools/bazel build --allow_undeclared_modules //common:kernel_x86_64_allmodconfig kernel_build( - name = "allmodconfig_x86_64", + name = "kernel_x86_64_allmodconfig", # Hack to actually check the build. # Otherwise, Bazel thinks that there are no output files, and skip building. outs = [".config"], @@ -350,9 +350,9 @@ kernel_build( visibility = ["//visibility:private"], ) -# tools/bazel build --allow_undeclared_modules //common:allmodconfig_arm +# tools/bazel build --allow_undeclared_modules //common:kernel_arm_allmodconfig kernel_build( - name = "allmodconfig_arm", + name = "kernel_arm_allmodconfig", # Hack to actually check the build. # Otherwise, Bazel thinks that there are no output files, and skip building. outs = [".config"], From f44e07a8afdd713ddc1a8832c39372fe5dd86895 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 18 Nov 2022 13:05:39 +0100 Subject: [PATCH 708/963] s390/crashdump: fix TOD programmable field size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of the TOD programmable field was incorrectly increased from four to eight bytes with commit 1a2c5840acf9 ("s390/dump: cleanup CPU save area handling"). This leads to an elf notes section NT_S390_TODPREG which has a size of eight instead of four bytes in case of kdump, however even worse is that the contents is incorrect: it is supposed to contain only the contents of the TOD programmable field, but in fact contains a mix of the TOD programmable field (32 bit upper bits) and parts of the CPU timer register (lower 32 bits). Fix this by simply changing the size of the todpreg field within the save area structure. This will implicitly also fix the size of the corresponding elf notes sections. This also gets rid of this compile time warning: in function ‘fortify_memcpy_chk’, inlined from ‘save_area_add_regs’ at arch/s390/kernel/crash_dump.c:99:2: ./include/linux/fortify-string.h:413:25: error: call to ‘__read_overflow2_field’ declared with attribute warning: detected read beyond size of field (2nd parameter); maybe use struct_group()? [-Werror=attribute-warning] 413 | __read_overflow2_field(q_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 1a2c5840acf9 ("s390/dump: cleanup CPU save area handling") Reviewed-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/crash_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index dd74fe664ed1..e4ef67e4da0a 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,7 +46,7 @@ struct save_area { u64 fprs[16]; u32 fpc; u32 prefix; - u64 todpreg; + u32 todpreg; u64 timer; u64 todcmp; u64 vxrs_low[16]; From adba1a9b81d5020a9bf8332fee9ff0171fe7623d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 17 Nov 2022 20:23:30 +0100 Subject: [PATCH 709/963] MAINTAINERS: add S390 MM section Alexander Gordeev and Gerald Schaefer are covering the whole s390 specific memory management code. Reflect that by adding a new S390 MM section to MAINTAINERS. Also rename the S390 section to S390 ARCHITECTURE to be a bit more precise. Acked-by: Gerald Schaefer Acked-by: Christian Borntraeger Acked-by: Alexander Gordeev Acked-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- MAINTAINERS | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ea5fcf9047ea..c4f6d3490d13 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17987,7 +17987,7 @@ L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/fbdev/savage/ -S390 +S390 ARCHITECTURE M: Heiko Carstens M: Vasily Gorbik M: Alexander Gordeev @@ -18042,6 +18042,15 @@ L: netdev@vger.kernel.org S: Supported F: drivers/s390/net/ +S390 MM +M: Alexander Gordeev +M: Gerald Schaefer +L: linux-s390@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git +F: arch/s390/include/asm/pgtable.h +F: arch/s390/mm + S390 PCI SUBSYSTEM M: Niklas Schnelle M: Gerald Schaefer From 0dd4cdccdab3d74bd86b868768a7dca216bcce7e Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 23 Nov 2022 10:08:33 +0100 Subject: [PATCH 710/963] KVM: s390: vsie: Fix the initialization of the epoch extension (epdx) field We recently experienced some weird huge time jumps in nested guests when rebooting them in certain cases. After adding some debug code to the epoch handling in vsie.c (thanks to David Hildenbrand for the idea!), it was obvious that the "epdx" field (the multi-epoch extension) did not get set to 0xff in case the "epoch" field was negative. Seems like the code misses to copy the value from the epdx field from the guest to the shadow control block. By doing so, the weird time jumps are gone in our scenarios. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2140899 Fixes: 8fa1696ea781 ("KVM: s390: Multiple Epoch Facility support") Signed-off-by: Thomas Huth Reviewed-by: Christian Borntraeger Acked-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Cc: stable@vger.kernel.org # 4.19+ Link: https://lore.kernel.org/r/20221123090833.292938-1-thuth@redhat.com Message-Id: <20221123090833.292938-1-thuth@redhat.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/vsie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 94138f8f0c1c..ace2541ababd 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -546,8 +546,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) scb_s->eca |= scb_o->eca & ECA_CEI; /* Epoch Extension */ - if (test_kvm_facility(vcpu->kvm, 139)) + if (test_kvm_facility(vcpu->kvm, 139)) { scb_s->ecd |= scb_o->ecd & ECD_MEF; + scb_s->epdx = scb_o->epdx; + } /* etoken */ if (test_kvm_facility(vcpu->kvm, 156)) From 8fa452cfafed521aaf5a18c71003fe24b1ee6141 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Thu, 10 Nov 2022 14:14:37 +0800 Subject: [PATCH 711/963] can: can327: can327_feed_frame_to_netdev(): fix potential skb leak when netdev is down In can327_feed_frame_to_netdev(), it did not free the skb when netdev is down, and all callers of can327_feed_frame_to_netdev() did not free allocated skb too. That would trigger skb leak. Fix it by adding kfree_skb() in can327_feed_frame_to_netdev() when netdev is down. Not tested, just compiled. Fixes: 43da2f07622f ("can: can327: CAN/ldisc driver for ELM327 based OBD-II adapters") Signed-off-by: Ziyang Xuan Link: https://lore.kernel.org/all/20221110061437.411525-1-william.xuanziyang@huawei.com Reviewed-by: Max Staudt Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/can327.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index 094197780776..ed3d0b8989a0 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -263,8 +263,10 @@ static void can327_feed_frame_to_netdev(struct can327 *elm, struct sk_buff *skb) { lockdep_assert_held(&elm->lock); - if (!netif_running(elm->dev)) + if (!netif_running(elm->dev)) { + kfree_skb(skb); return; + } /* Queue for NAPI pickup. * rx-offload will update stats and LEDs for us. From 26e8f6a75248247982458e8237b98c9fb2ffcf9d Mon Sep 17 00:00:00 2001 From: Heiko Schocher Date: Wed, 23 Nov 2022 08:16:36 +0100 Subject: [PATCH 712/963] can: sja1000: fix size of OCR_MODE_MASK define bitfield mode in ocr register has only 2 bits not 3, so correct the OCR_MODE_MASK define. Signed-off-by: Heiko Schocher Link: https://lore.kernel.org/all/20221123071636.2407823-1-hs@denx.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/platform/sja1000.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h index 5755ae5a4712..6a869682c120 100644 --- a/include/linux/can/platform/sja1000.h +++ b/include/linux/can/platform/sja1000.h @@ -14,7 +14,7 @@ #define OCR_MODE_TEST 0x01 #define OCR_MODE_NORMAL 0x02 #define OCR_MODE_CLOCK 0x03 -#define OCR_MODE_MASK 0x07 +#define OCR_MODE_MASK 0x03 #define OCR_TX0_INVERT 0x04 #define OCR_TX0_PULLDOWN 0x08 #define OCR_TX0_PULLUP 0x10 From 92dfd9310a71d28cefe6a2d5174d43fab240e631 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 11 Nov 2022 20:08:41 +0800 Subject: [PATCH 713/963] can: sja1000_isa: sja1000_isa_probe(): add missing free_sja1000dev() Add the missing free_sja1000dev() before return from sja1000_isa_probe() in the register_sja1000dev() error handling case. In addition, remove blanks before goto labels. Fixes: 2a6ba39ad6a2 ("can: sja1000: legacy SJA1000 ISA bus driver") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/all/1668168521-5540-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/sja1000_isa.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c index d513fac50718..db3e767d5320 100644 --- a/drivers/net/can/sja1000/sja1000_isa.c +++ b/drivers/net/can/sja1000/sja1000_isa.c @@ -202,22 +202,24 @@ static int sja1000_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", DRV_NAME, err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "%s device registered (reg_base=0x%p, irq=%d)\n", DRV_NAME, priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_sja1000dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } From 62ec89e74099a3d6995988ed9f2f996b368417ec Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 11 Nov 2022 20:09:16 +0800 Subject: [PATCH 714/963] can: cc770: cc770_isa_probe(): add missing free_cc770dev() Add the missing free_cc770dev() before return from cc770_isa_probe() in the register_cc770dev() error handling case. In addition, remove blanks before goto labels. Fixes: 7e02e5433e00 ("can: cc770: legacy CC770 ISA bus driver") Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/all/1668168557-6024-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/cc770/cc770_isa.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/cc770/cc770_isa.c b/drivers/net/can/cc770/cc770_isa.c index 194c86e0f340..8f6dccd5a587 100644 --- a/drivers/net/can/cc770/cc770_isa.c +++ b/drivers/net/can/cc770/cc770_isa.c @@ -264,22 +264,24 @@ static int cc770_isa_probe(struct platform_device *pdev) if (err) { dev_err(&pdev->dev, "couldn't register device (err=%d)\n", err); - goto exit_unmap; + goto exit_free; } dev_info(&pdev->dev, "device registered (reg_base=0x%p, irq=%d)\n", priv->reg_base, dev->irq); return 0; - exit_unmap: +exit_free: + free_cc770dev(dev); +exit_unmap: if (mem[idx]) iounmap(base); - exit_release: +exit_release: if (mem[idx]) release_mem_region(mem[idx], iosize); else release_region(port[idx], iosize); - exit: +exit: return err; } From 709cb2f9ed2006eb1dc4b36b99d601cd24889ec4 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Mon, 14 Nov 2022 16:14:44 +0800 Subject: [PATCH 715/963] can: etas_es58x: es58x_init_netdev(): free netdev when register_candev() In case of register_candev() fails, clear es58x_dev->netdev[channel_idx] and add free_candev(). Otherwise es58x_free_netdevs() will unregister the netdev that has never been registered. Fixes: 8537257874e9 ("can: etas_es58x: add core support for ETAS ES58X CAN USB interfaces") Signed-off-by: Zhang Changzhong Acked-by: Arunachalam Santhanam Acked-by: Vincent Mailhol Link: https://lore.kernel.org/all/1668413685-23354-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/etas_es58x/es58x_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/etas_es58x/es58x_core.c b/drivers/net/can/usb/etas_es58x/es58x_core.c index 25f863b4f5f0..ddb7c5735c9a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_core.c +++ b/drivers/net/can/usb/etas_es58x/es58x_core.c @@ -2091,8 +2091,11 @@ static int es58x_init_netdev(struct es58x_device *es58x_dev, int channel_idx) netdev->dev_port = channel_idx; ret = register_candev(netdev); - if (ret) + if (ret) { + es58x_dev->netdev[channel_idx] = NULL; + free_candev(netdev); return ret; + } netdev_queue_set_dql_min_limit(netdev_get_tx_queue(netdev, 0), es58x_dev->param->dql_min_limit); From 1eca1d4cc21b6d0fc5f9a390339804c0afce9439 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 11 Nov 2022 20:11:23 +0800 Subject: [PATCH 716/963] can: m_can: pci: add missing m_can_class_free_dev() in probe/remove methods In m_can_pci_remove() and error handling path of m_can_pci_probe(), m_can_class_free_dev() should be called to free resource allocated by m_can_class_allocate_dev(), otherwise there will be memleak. Fixes: cab7ffc0324f ("can: m_can: add PCI glue driver for Intel Elkhart Lake") Signed-off-by: Zhang Changzhong Reviewed-by: Jarkko Nikula Link: https://lore.kernel.org/all/1668168684-6390-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can_pci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can_pci.c b/drivers/net/can/m_can/m_can_pci.c index 8f184a852a0a..f2219aa2824b 100644 --- a/drivers/net/can/m_can/m_can_pci.c +++ b/drivers/net/can/m_can/m_can_pci.c @@ -120,7 +120,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) - return ret; + goto err_free_dev; mcan_class->dev = &pci->dev; mcan_class->net->irq = pci_irq_vector(pci, 0); @@ -132,7 +132,7 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) ret = m_can_class_register(mcan_class); if (ret) - goto err; + goto err_free_irq; /* Enable interrupt control at CAN wrapper IP */ writel(0x1, base + CTL_CSR_INT_CTL_OFFSET); @@ -144,8 +144,10 @@ static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) return 0; -err: +err_free_irq: pci_free_irq_vectors(pci); +err_free_dev: + m_can_class_free_dev(mcan_class->net); return ret; } @@ -161,6 +163,7 @@ static void m_can_pci_remove(struct pci_dev *pci) writel(0x0, priv->base + CTL_CSR_INT_CTL_OFFSET); m_can_class_unregister(mcan_class); + m_can_class_free_dev(mcan_class->net); pci_free_irq_vectors(pci); } From 68b4f9e0bdd0f920d7303d07bfe226cd0976961d Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 23 Nov 2022 14:36:51 +0800 Subject: [PATCH 717/963] can: m_can: Add check for devm_clk_get Since the devm_clk_get may return error, it should be better to add check for the cdev->hclk, as same as cdev->cclk. Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/all/20221123063651.26199-1-jiasheng@iscas.ac.cn Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 00d11e95fd98..e5575d2755e4 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1909,7 +1909,7 @@ int m_can_class_get_clocks(struct m_can_classdev *cdev) cdev->hclk = devm_clk_get(cdev->dev, "hclk"); cdev->cclk = devm_clk_get(cdev->dev, "cclk"); - if (IS_ERR(cdev->cclk)) { + if (IS_ERR(cdev->hclk) || IS_ERR(cdev->cclk)) { dev_err(cdev->dev, "no clock found\n"); ret = -ENODEV; } From 1a8e3bd25f1e789c8154e11ea24dc3ec5a4c1da0 Mon Sep 17 00:00:00 2001 From: Yasushi SHOJI Date: Fri, 25 Nov 2022 00:25:03 +0900 Subject: [PATCH 718/963] can: mcba_usb: Fix termination command argument Microchip USB Analyzer can activate the internal termination resistors by setting the "termination" option ON, or OFF to to deactivate them. As I've observed, both with my oscilloscope and captured USB packets below, you must send "0" to turn it ON, and "1" to turn it OFF. From the schematics in the user's guide, I can confirm that you must drive the CAN_RES signal LOW "0" to activate the resistors. Reverse the argument value of usb_msg.termination to fix this. These are the two commands sequence, ON then OFF. > No. Time Source Destination Protocol Length Info > 1 0.000000 host 1.3.1 USB 46 URB_BULK out > > Frame 1: 46 bytes on wire (368 bits), 46 bytes captured (368 bits) > USB URB > Leftover Capture Data: a80000000000000000000000000000000000a8 > > No. Time Source Destination Protocol Length Info > 2 4.372547 host 1.3.1 USB 46 URB_BULK out > > Frame 2: 46 bytes on wire (368 bits), 46 bytes captured (368 bits) > USB URB > Leftover Capture Data: a80100000000000000000000000000000000a9 Signed-off-by: Yasushi SHOJI Link: https://lore.kernel.org/all/20221124152504.125994-1-yashi@spacecubics.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 218b098b261d..47619e9cb005 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -47,6 +47,10 @@ #define MCBA_VER_REQ_USB 1 #define MCBA_VER_REQ_CAN 2 +/* Drive the CAN_RES signal LOW "0" to activate R24 and R25 */ +#define MCBA_VER_TERMINATION_ON 0 +#define MCBA_VER_TERMINATION_OFF 1 + #define MCBA_SIDL_EXID_MASK 0x8 #define MCBA_DLC_MASK 0xf #define MCBA_DLC_RTR_MASK 0x40 @@ -463,7 +467,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, priv->usb_ka_first_pass = false; } - if (msg->termination_state) + if (msg->termination_state == MCBA_VER_TERMINATION_ON) priv->can.termination = MCBA_TERMINATION_ENABLED; else priv->can.termination = MCBA_TERMINATION_DISABLED; @@ -785,9 +789,9 @@ static int mcba_set_termination(struct net_device *netdev, u16 term) }; if (term == MCBA_TERMINATION_ENABLED) - usb_msg.termination = 1; + usb_msg.termination = MCBA_VER_TERMINATION_ON; else - usb_msg.termination = 0; + usb_msg.termination = MCBA_VER_TERMINATION_OFF; mcba_usb_xmit_cmd(priv, (struct mcba_usb_msg *)&usb_msg); From cd21d99e595ec1d8721e1058dcdd4f1f7de1d793 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:40 -0500 Subject: [PATCH 719/963] wifi: wilc1000: validate pairwise and authentication suite offsets There is no validation of 'offset' which can trigger an out-of-bounds read when extracting RSN capabilities. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-2-philipturnbull@github.com --- drivers/net/wireless/microchip/wilc1000/hif.c | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index eb1d1ba3a443..67df8221b5ae 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -482,14 +482,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; - param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } } if (param->rsn_found) { From 051ae669e4505abbe05165bebf6be7922de11f41 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:41 -0500 Subject: [PATCH 720/963] wifi: wilc1000: validate length of IEEE80211_P2P_ATTR_OPER_CHANNEL attribute Validate that the IEEE80211_P2P_ATTR_OPER_CHANNEL attribute contains enough space for a 'struct struct wilc_attr_oper_ch'. If the attribute is too small then it triggers an out-of-bounds write later in the function. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-3-philipturnbull@github.com --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 9bbfff803357..aedf0e8b69b9 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -959,14 +959,24 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return; while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; } if (ch_list_idx) { From f9b62f9843c7b0afdaecabbcebf1dbba18599408 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:42 -0500 Subject: [PATCH 721/963] wifi: wilc1000: validate length of IEEE80211_P2P_ATTR_CHANNEL_LIST attribute Validate that the IEEE80211_P2P_ATTR_CHANNEL_LIST attribute contains enough space for a 'struct wilc_attr_oper_ch'. If the attribute is too small then it can trigger an out-of-bounds write later in the function. 'struct wilc_attr_oper_ch' is variable sized so also check 'attr_len' does not extend beyond the end of 'buf'. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-4-philipturnbull@github.com --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index aedf0e8b69b9..c4d5a272ccc0 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -967,7 +967,8 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) if (index + sizeof(*e) + attr_size > len) return; - if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) From 0cdfa9e6f0915e3d243e2393bfa8a22e12d553b0 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:43 -0500 Subject: [PATCH 722/963] wifi: wilc1000: validate number of channels There is no validation of 'e->no_of_channels' which can trigger an out-of-bounds write in the following 'memset' call. Validate that the number of channels does not extends beyond the size of the channel list element. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-5-philipturnbull@github.com --- .../wireless/microchip/wilc1000/cfg80211.c | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index c4d5a272ccc0..b545d93c6e37 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -981,19 +981,29 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) } if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + u16 elem_size; ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (unsigned int i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } } From 10bc8e4af65946b727728d7479c028742321b60a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 17 Nov 2022 22:52:49 +0200 Subject: [PATCH 723/963] vfs: fix copy_file_range() averts filesystem freeze protection Commit 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs copies") removed fallback to generic_copy_file_range() for cross-fs cases inside vfs_copy_file_range(). To preserve behavior of nfsd and ksmbd server-side-copy, the fallback to generic_copy_file_range() was added in nfsd and ksmbd code, but that call is missing sb_start_write(), fsnotify hooks and more. Ideally, nfsd and ksmbd would pass a flag to vfs_copy_file_range() that will take care of the fallback, but that code would be subtle and we got vfs_copy_file_range() logic wrong too many times already. Instead, add a flag to explicitly request vfs_copy_file_range() to perform only generic_copy_file_range() and let nfsd and ksmbd use this flag only in the fallback path. This choise keeps the logic changes to minimum in the non-nfsd/ksmbd code paths to reduce the risk of further regressions. Fixes: 868f9f2f8e00 ("vfs: fix copy_file_range() regression in cross-fs copies") Tested-by: Namjae Jeon Tested-by: Luis Henriques Signed-off-by: Amir Goldstein Signed-off-by: Al Viro --- fs/ksmbd/vfs.c | 6 +++--- fs/nfsd/vfs.c | 4 ++-- fs/read_write.c | 19 +++++++++++++++---- include/linux/fs.h | 8 ++++++++ 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 8de970d6146f..94b8ed4ef870 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1794,9 +1794,9 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_fp->filp, src_off, - dst_fp->filp, dst_off, - len, 0); + ret = vfs_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, len, + COPY_FILE_SPLICE); if (ret < 0) return ret; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f650afedd67f..5cf11cde51f8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -596,8 +596,8 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src, src_pos, dst, dst_pos, - count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, + COPY_FILE_SPLICE); return ret; } diff --git a/fs/read_write.c b/fs/read_write.c index 328ce8cf9a85..24b9668d6377 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1388,6 +1388,8 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t len, unsigned int flags) { + lockdep_assert(sb_write_started(file_inode(file_out)->i_sb)); + return do_splice_direct(file_in, &pos_in, file_out, &pos_out, len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); } @@ -1424,7 +1426,9 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, * and several different sets of file_operations, but they all end up * using the same ->copy_file_range() function pointer. */ - if (file_out->f_op->copy_file_range) { + if (flags & COPY_FILE_SPLICE) { + /* cross sb splice is allowed */ + } else if (file_out->f_op->copy_file_range) { if (file_in->f_op->copy_file_range != file_out->f_op->copy_file_range) return -EXDEV; @@ -1474,8 +1478,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, size_t len, unsigned int flags) { ssize_t ret; + bool splice = flags & COPY_FILE_SPLICE; - if (flags != 0) + if (flags & ~COPY_FILE_SPLICE) return -EINVAL; ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, @@ -1501,14 +1506,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * same sb using clone, but for filesystems where both clone and copy * are supported (e.g. nfs,cifs), we only call the copy method. */ - if (file_out->f_op->copy_file_range) { + if (!splice && file_out->f_op->copy_file_range) { ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); goto done; } - if (file_in->f_op->remap_file_range && + if (!splice && file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, @@ -1528,6 +1533,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, * consistent story about which filesystems support copy_file_range() * and which filesystems do not, that will allow userspace tools to * make consistent desicions w.r.t using copy_file_range(). + * + * We also get here if caller (e.g. nfsd) requested COPY_FILE_SPLICE. */ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, flags); @@ -1582,6 +1589,10 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, pos_out = f_out.file->f_pos; } + ret = -EINVAL; + if (flags != 0) + goto out; + ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, flags); if (ret > 0) { diff --git a/include/linux/fs.h b/include/linux/fs.h index e654435f1651..59ae95ddb679 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2089,6 +2089,14 @@ struct dir_context { */ #define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN) +/* + * These flags control the behavior of vfs_copy_file_range(). + * They are not available to the user via syscall. + * + * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops + */ +#define COPY_FILE_SPLICE (1 << 0) + struct iov_iter; struct io_uring_cmd; From db58653ce0c7cf4d155727852607106f890005c0 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 21 Nov 2022 16:29:37 +0900 Subject: [PATCH 724/963] zonefs: Fix active zone accounting If a file zone transitions to the offline or readonly state from an active state, we must clear the zone active flag and decrement the active seq file counter. Do so in zonefs_account_active() using the new zonefs inode flags ZONEFS_ZONE_OFFLINE and ZONEFS_ZONE_READONLY. These flags are set if necessary in zonefs_check_zone_condition() based on the result of report zones operation after an IO error. Fixes: 87c9ce3ffec9 ("zonefs: Add active seq file accounting") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn --- fs/zonefs/super.c | 11 +++++++++++ fs/zonefs/zonefs.h | 6 ++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index f0e8a000f073..2c53fbb8d918 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -40,6 +40,13 @@ static void zonefs_account_active(struct inode *inode) if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) return; + /* + * For zones that transitioned to the offline or readonly condition, + * we only need to clear the active state. + */ + if (zi->i_flags & (ZONEFS_ZONE_OFFLINE | ZONEFS_ZONE_READONLY)) + goto out; + /* * If the zone is active, that is, if it is explicitly open or * partially written, check if it was already accounted as active. @@ -53,6 +60,7 @@ static void zonefs_account_active(struct inode *inode) return; } +out: /* The zone is not active. If it was, update the active count */ if (zi->i_flags & ZONEFS_ZONE_ACTIVE) { zi->i_flags &= ~ZONEFS_ZONE_ACTIVE; @@ -324,6 +332,7 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, inode->i_flags |= S_IMMUTABLE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; case BLK_ZONE_COND_READONLY: /* @@ -342,8 +351,10 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, zone->cond = BLK_ZONE_COND_OFFLINE; inode->i_mode &= ~0777; zone->wp = zone->start; + zi->i_flags |= ZONEFS_ZONE_OFFLINE; return 0; } + zi->i_flags |= ZONEFS_ZONE_READONLY; inode->i_mode &= ~0222; return i_size_read(inode); case BLK_ZONE_COND_FULL: diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 4b3de66c3233..1dbe78119ff1 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -39,8 +39,10 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) return ZONEFS_ZTYPE_SEQ; } -#define ZONEFS_ZONE_OPEN (1 << 0) -#define ZONEFS_ZONE_ACTIVE (1 << 1) +#define ZONEFS_ZONE_OPEN (1U << 0) +#define ZONEFS_ZONE_ACTIVE (1U << 1) +#define ZONEFS_ZONE_OFFLINE (1U << 2) +#define ZONEFS_ZONE_READONLY (1U << 3) /* * In-memory inode data. From 8dbd6e4ce1b9c527921643d9e34f188a10d4e893 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 23 Nov 2022 18:06:42 +0800 Subject: [PATCH 725/963] qlcnic: fix sleep-in-atomic-context bugs caused by msleep The watchdog timer is used to monitor whether the process of transmitting data is timeout. If we use qlcnic driver, the dev_watchdog() that is the timer handler of watchdog timer will call qlcnic_tx_timeout() to process the timeout. But the qlcnic_tx_timeout() calls msleep(), as a result, the sleep-in-atomic-context bugs will happen. The processes are shown below: (atomic context) dev_watchdog qlcnic_tx_timeout qlcnic_83xx_idc_request_reset qlcnic_83xx_lock_driver msleep --------------------------- (atomic context) dev_watchdog qlcnic_tx_timeout qlcnic_83xx_idc_request_reset qlcnic_83xx_lock_driver qlcnic_83xx_recover_driver_lock msleep Fix by changing msleep() to mdelay(), the mdelay() is busy-waiting and the bugs could be mitigated. Fixes: 629263acaea3 ("qlcnic: 83xx CNA inter driver communication mechanism") Signed-off-by: Duoming Zhou Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index bd0607680329..2fd5c6fdb500 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2991,7 +2991,7 @@ static void qlcnic_83xx_recover_driver_lock(struct qlcnic_adapter *adapter) QLCWRX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK, val); dev_info(&adapter->pdev->dev, "%s: lock recovery initiated\n", __func__); - msleep(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); + mdelay(QLC_83XX_DRV_LOCK_RECOVERY_DELAY); val = QLCRDX(adapter->ahw, QLC_83XX_RECOVER_DRV_LOCK); id = ((val >> 2) & 0xF); if (id == adapter->portnum) { @@ -3027,7 +3027,7 @@ int qlcnic_83xx_lock_driver(struct qlcnic_adapter *adapter) if (status) break; - msleep(QLC_83XX_DRV_LOCK_WAIT_DELAY); + mdelay(QLC_83XX_DRV_LOCK_WAIT_DELAY); i++; if (i == 1) From 39701603519e5b43a717b6db6fef9144800fdad8 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Wed, 23 Nov 2022 11:33:05 +0100 Subject: [PATCH 726/963] qed: avoid defines prefixed with CONFIG Defines prefixed with "CONFIG" should be limited to proper Kconfig options, that are introduced in a Kconfig file. Here, constants for bitmap indices of some configs are defined and these defines begin with the config's name, and are suffixed with BITMAP_IDX. To avoid defines prefixed with "CONFIG", name these constants BITMAP_IDX_FOR_CONFIG_XYZ instead of CONFIG_XYZ_BITMAP_IDX. No functional change. Signed-off-by: Lukas Bulwahn Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 24 +++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9fb1fa479d4b..16e6bd466143 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -767,34 +767,34 @@ static int qed_mcp_cancel_load_req(struct qed_hwfn *p_hwfn, return rc; } -#define CONFIG_QEDE_BITMAP_IDX BIT(0) -#define CONFIG_QED_SRIOV_BITMAP_IDX BIT(1) -#define CONFIG_QEDR_BITMAP_IDX BIT(2) -#define CONFIG_QEDF_BITMAP_IDX BIT(4) -#define CONFIG_QEDI_BITMAP_IDX BIT(5) -#define CONFIG_QED_LL2_BITMAP_IDX BIT(6) +#define BITMAP_IDX_FOR_CONFIG_QEDE BIT(0) +#define BITMAP_IDX_FOR_CONFIG_QED_SRIOV BIT(1) +#define BITMAP_IDX_FOR_CONFIG_QEDR BIT(2) +#define BITMAP_IDX_FOR_CONFIG_QEDF BIT(4) +#define BITMAP_IDX_FOR_CONFIG_QEDI BIT(5) +#define BITMAP_IDX_FOR_CONFIG_QED_LL2 BIT(6) static u32 qed_get_config_bitmap(void) { u32 config_bitmap = 0x0; if (IS_ENABLED(CONFIG_QEDE)) - config_bitmap |= CONFIG_QEDE_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDE; if (IS_ENABLED(CONFIG_QED_SRIOV)) - config_bitmap |= CONFIG_QED_SRIOV_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_SRIOV; if (IS_ENABLED(CONFIG_QED_RDMA)) - config_bitmap |= CONFIG_QEDR_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDR; if (IS_ENABLED(CONFIG_QED_FCOE)) - config_bitmap |= CONFIG_QEDF_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDF; if (IS_ENABLED(CONFIG_QED_ISCSI)) - config_bitmap |= CONFIG_QEDI_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QEDI; if (IS_ENABLED(CONFIG_QED_LL2)) - config_bitmap |= CONFIG_QED_LL2_BITMAP_IDX; + config_bitmap |= BITMAP_IDX_FOR_CONFIG_QED_LL2; return config_bitmap; } From 2a83891130512dafb321418a8e7c9c09268d8c59 Mon Sep 17 00:00:00 2001 From: Izabela Bakollari Date: Wed, 23 Nov 2022 11:10:08 +0100 Subject: [PATCH 727/963] aquantia: Do not purge addresses when setting the number of rings IPV6 addresses are purged when setting the number of rx/tx rings using ethtool -G. The function aq_set_ringparam calls dev_close, which removes the addresses. As a solution, call an internal function (aq_ndev_close). Fixes: c1af5427954b ("net: aquantia: Ethtool based ring size configuration") Signed-off-by: Izabela Bakollari Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c | 5 +++-- drivers/net/ethernet/aquantia/atlantic/aq_main.c | 4 ++-- drivers/net/ethernet/aquantia/atlantic/aq_main.h | 2 ++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a08f221e30d4..ac4ea93bd8dd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -13,6 +13,7 @@ #include "aq_ptp.h" #include "aq_filters.h" #include "aq_macsec.h" +#include "aq_main.h" #include @@ -858,7 +859,7 @@ static int aq_set_ringparam(struct net_device *ndev, if (netif_running(ndev)) { ndev_running = true; - dev_close(ndev); + aq_ndev_close(ndev); } cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min); @@ -874,7 +875,7 @@ static int aq_set_ringparam(struct net_device *ndev, goto err_exit; if (ndev_running) - err = dev_open(ndev, NULL); + err = aq_ndev_open(ndev); err_exit: return err; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 8a0af371e7dc..77609dc0a08d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -58,7 +58,7 @@ struct net_device *aq_ndev_alloc(void) return ndev; } -static int aq_ndev_open(struct net_device *ndev) +int aq_ndev_open(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; @@ -88,7 +88,7 @@ static int aq_ndev_open(struct net_device *ndev) return err; } -static int aq_ndev_close(struct net_device *ndev) +int aq_ndev_close(struct net_device *ndev) { struct aq_nic_s *aq_nic = netdev_priv(ndev); int err = 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.h b/drivers/net/ethernet/aquantia/atlantic/aq_main.h index 99870865f66d..a78c1a168d8e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.h @@ -16,5 +16,7 @@ DECLARE_STATIC_KEY_FALSE(aq_xdp_locking_key); void aq_ndev_schedule_work(struct work_struct *work); struct net_device *aq_ndev_alloc(void); +int aq_ndev_open(struct net_device *ndev); +int aq_ndev_close(struct net_device *ndev); #endif /* AQ_MAIN_H */ From cc3d2b5fc0d6f8ad8a52da5ea679e5c2ec2adbd4 Mon Sep 17 00:00:00 2001 From: "Goh, Wei Sheng" Date: Wed, 23 Nov 2022 18:51:10 +0800 Subject: [PATCH 728/963] net: stmmac: Set MAC's flow control register to reflect current settings Currently, pause frame register GMAC_RX_FLOW_CTRL_RFE is not updated correctly when 'ethtool -A autoneg off rx off tx off' command is issued. This fix ensures the flow control change is reflected directly in the GMAC_RX_FLOW_CTRL_RFE register. Fixes: 46f69ded988d ("net: stmmac: Use resolved link config in mac_link_up()") Cc: # 5.10.x Signed-off-by: Goh, Wei Sheng Signed-off-by: Noor Azura Ahmad Tarmizi Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index c25bfecb4a2d..e5cfde1cbd5c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -748,6 +748,8 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, if (fc & FLOW_RX) { pr_debug("\tReceive Flow-Control ON\n"); flow |= GMAC_RX_FLOW_CTRL_RFE; + } else { + pr_debug("\tReceive Flow-Control OFF\n"); } writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6b43da78cdf0..23ec0a9e396c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1061,8 +1061,16 @@ static void stmmac_mac_link_up(struct phylink_config *config, ctrl |= priv->hw->link.duplex; /* Flow Control operation */ - if (tx_pause && rx_pause) - stmmac_mac_flow_ctrl(priv, duplex); + if (rx_pause && tx_pause) + priv->flow_ctrl = FLOW_AUTO; + else if (rx_pause && !tx_pause) + priv->flow_ctrl = FLOW_RX; + else if (!rx_pause && tx_pause) + priv->flow_ctrl = FLOW_TX; + else + priv->flow_ctrl = FLOW_OFF; + + stmmac_mac_flow_ctrl(priv, duplex); if (ctrl != old_ctrl) writel(ctrl, priv->ioaddr + MAC_CTRL_REG); From 32b931c86d0aef4f3263de457f58d82e9cbae2a2 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Wed, 23 Nov 2022 16:29:38 +0530 Subject: [PATCH 729/963] octeontx2-pf: Fix pfc_alloc_status array overflow This patch addresses pfc_alloc_status array overflow occurring for send queue index value greater than PFC priority. Queue index can be greater than supported PFC priority for multiple scenarios (e.g. QoS, during non zero SMQ allocation for a PF/VF). In those scenarios the API should return default tx scheduler '0'. This is causing mbox errors as otx2_get_smq_idx returing invalid smq value. Fixes: 99c969a83d82 ("octeontx2-pf: Add egress PFC support") Signed-off-by: Suman Ghosh Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 282db6fe3b08..67aa02bb2b85 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -884,7 +884,7 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf, static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx) { #ifdef CONFIG_DCB - if (pfvf->pfc_alloc_status[qidx]) + if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx]) return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx]; #endif From df727d4547de568302b0ed15b0d4e8a469bdb456 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 23 Nov 2022 14:38:52 +0100 Subject: [PATCH 730/963] net: fec: don't reset irq coalesce settings to defaults on "ip link up" Currently, when a FEC device is brought up, the irq coalesce settings are reset to their default values (1000us, 200 frames). That's unexpected, and breaks for example use of an appropriate .link file to make systemd-udev apply the desired settings (https://www.freedesktop.org/software/systemd/man/systemd.link.html), or any other method that would do a one-time setup during early boot. Refactor the code so that fec_restart() instead uses fec_enet_itr_coal_set(), which simply applies the settings that are stored in the private data, and initialize that private data with the default values. Signed-off-by: Rasmus Villemoes Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index f623c12eaf95..2ca2b61b451f 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -74,7 +74,7 @@ #include "fec.h" static void set_multicast_list(struct net_device *ndev); -static void fec_enet_itr_coal_init(struct net_device *ndev); +static void fec_enet_itr_coal_set(struct net_device *ndev); #define DRIVER_NAME "fec" @@ -1220,8 +1220,7 @@ fec_restart(struct net_device *ndev) writel(0, fep->hwp + FEC_IMASK); /* Init the interrupt coalescing */ - fec_enet_itr_coal_init(ndev); - + fec_enet_itr_coal_set(ndev); } static int fec_enet_ipc_handle_init(struct fec_enet_private *fep) @@ -2856,19 +2855,6 @@ static int fec_enet_set_coalesce(struct net_device *ndev, return 0; } -static void fec_enet_itr_coal_init(struct net_device *ndev) -{ - struct ethtool_coalesce ec; - - ec.rx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.rx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - ec.tx_coalesce_usecs = FEC_ITR_ICTT_DEFAULT; - ec.tx_max_coalesced_frames = FEC_ITR_ICFT_DEFAULT; - - fec_enet_set_coalesce(ndev, &ec, NULL, NULL); -} - static int fec_enet_get_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, void *data) @@ -3623,6 +3609,10 @@ static int fec_enet_init(struct net_device *ndev) fep->rx_align = 0x3; fep->tx_align = 0x3; #endif + fep->rx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->tx_pkts_itr = FEC_ITR_ICFT_DEFAULT; + fep->rx_time_itr = FEC_ITR_ICTT_DEFAULT; + fep->tx_time_itr = FEC_ITR_ICTT_DEFAULT; /* Check mask of the streaming and coherent API */ ret = dma_set_mask_and_coherent(&fep->pdev->dev, DMA_BIT_MASK(32)); From 31d929de5a112ee1b977a89c57de74710894bbbf Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 23 Nov 2022 15:18:28 +0100 Subject: [PATCH 731/963] net: loopback: use NET_NAME_PREDICTABLE for name_assign_type When the name_assign_type attribute was introduced (commit 685343fc3ba6, "net: add name_assign_type netdev attribute"), the loopback device was explicitly mentioned as one which would make use of NET_NAME_PREDICTABLE: The name_assign_type attribute gives hints where the interface name of a given net-device comes from. These values are currently defined: ... NET_NAME_PREDICTABLE: The ifname has been assigned by the kernel in a predictable way that is guaranteed to avoid reuse and always be the same for a given device. Examples include statically created devices like the loopback device [...] Switch to that so that reading /sys/class/net/lo/name_assign_type produces something sensible instead of returning -EINVAL. Signed-off-by: Rasmus Villemoes Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/loopback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 14e8d04cb434..2e9742952c4e 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -211,7 +211,7 @@ static __net_init int loopback_net_init(struct net *net) int err; err = -ENOMEM; - dev = alloc_netdev(0, "lo", NET_NAME_UNKNOWN, loopback_setup); + dev = alloc_netdev(0, "lo", NET_NAME_PREDICTABLE, loopback_setup); if (!dev) goto out; From f4307b4df1c28842bb1950ff0e1b97e17031b17f Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Wed, 23 Nov 2022 17:55:06 +0800 Subject: [PATCH 732/963] mmc: mmc_test: Fix removal of debugfs file In __mmc_test_register_dbgfs_file(), we need to assign 'file', as it's being used when removing the debugfs files when the mmc_test module is removed. Fixes: a04c50aaa916 ("mmc: core: no need to check return value of debugfs_create functions") Signed-off-by: Ye Bin Acked-by: Adrian Hunter Cc: stable@vger.kernel.org [Ulf: Re-wrote the commit msg] Link: https://lore.kernel.org/r/20221123095506.1965691-1-yebin@huaweicloud.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc_test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8d9bceeff986..155ce2bdfe62 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3179,7 +3179,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card, struct mmc_test_dbgfs_file *df; if (card->debugfs_root) - debugfs_create_file(name, mode, card->debugfs_root, card, fops); + file = debugfs_create_file(name, mode, card->debugfs_root, + card, fops); df = kmalloc(sizeof(*df), GFP_KERNEL); if (!df) { From 9f16b5c82a025cd4c864737409234ddc44fb166a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Nov 2022 12:36:57 +0100 Subject: [PATCH 733/963] wifi: cfg80211: fix buffer overflow in elem comparison MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For vendor elements, the code here assumes that 5 octets are present without checking. Since the element itself is already checked to fit, we only need to check the length. Reported-and-tested-by: Sönke Huster Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Signed-off-by: Johannes Berg --- net/wireless/scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index da752b0cc752..4d217798890a 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -330,7 +330,8 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, * determine if they are the same ie. */ if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { - if (!memcmp(tmp_old + 2, tmp + 2, 5)) { + if (tmp_old[1] >= 5 && tmp[1] >= 5 && + !memcmp(tmp_old + 2, tmp + 2, 5)) { /* same vendor ie, copy from * subelement */ From acd3c92acc7aaec50a94d0a7faf7ccd74e952493 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Nov 2022 12:36:58 +0100 Subject: [PATCH 734/963] wifi: cfg80211: don't allow multi-BSSID in S1G MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In S1G beacon frames there shouldn't be multi-BSSID elements since that's not supported, remove that to avoid a potential integer underflow and/or misparsing the frames due to the different length of the fixed part of the frame. While at it, initialize non_tx_data so we don't send garbage values to the user (even if it doesn't seem to matter now.) Reported-and-tested-by: Sönke Huster Fixes: 9eaffe5078ca ("cfg80211: convert S1G beacon to scan results") Signed-off-by: Johannes Berg --- net/wireless/scan.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4d217798890a..3d86482e83f5 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2527,10 +2527,15 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, const struct cfg80211_bss_ies *ies1, *ies2; size_t ielen = len - offsetof(struct ieee80211_mgmt, u.probe_resp.variable); - struct cfg80211_non_tx_bss non_tx_data; + struct cfg80211_non_tx_bss non_tx_data = {}; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, len, gfp); + + /* don't do any further MBSSID handling for S1G */ + if (ieee80211_is_s1g_beacon(mgmt->frame_control)) + return res; + if (!res || !wiphy->support_mbssid || !cfg80211_find_elem(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; From 3e8f7abcc3473bc9603323803aeaed4ffcc3a2ab Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 8 Nov 2022 16:19:26 +0100 Subject: [PATCH 735/963] wifi: mac8021: fix possible oob access in ieee80211_get_rate_duration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix possible out-of-bound access in ieee80211_get_rate_duration routine as reported by the following UBSAN report: UBSAN: array-index-out-of-bounds in net/mac80211/airtime.c:455:47 index 15 is out of range for type 'u16 [12]' CPU: 2 PID: 217 Comm: kworker/u32:10 Not tainted 6.1.0-060100rc3-generic Hardware name: Acer Aspire TC-281/Aspire TC-281, BIOS R01-A2 07/18/2017 Workqueue: mt76 mt76u_tx_status_data [mt76_usb] Call Trace: show_stack+0x4e/0x61 dump_stack_lvl+0x4a/0x6f dump_stack+0x10/0x18 ubsan_epilogue+0x9/0x43 __ubsan_handle_out_of_bounds.cold+0x42/0x47 ieee80211_get_rate_duration.constprop.0+0x22f/0x2a0 [mac80211] ? ieee80211_tx_status_ext+0x32e/0x640 [mac80211] ieee80211_calc_rx_airtime+0xda/0x120 [mac80211] ieee80211_calc_tx_airtime+0xb4/0x100 [mac80211] mt76x02_send_tx_status+0x266/0x480 [mt76x02_lib] mt76x02_tx_status_data+0x52/0x80 [mt76x02_lib] mt76u_tx_status_data+0x67/0xd0 [mt76_usb] process_one_work+0x225/0x400 worker_thread+0x50/0x3e0 ? process_one_work+0x400/0x400 kthread+0xe9/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Fixes: db3e1c40cf2f ("mac80211: Import airtime calculation code from mt76") Signed-off-by: Lorenzo Bianconi Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/airtime.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 2e66598fac79..e8ebd343e2bf 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -452,6 +452,9 @@ static u32 ieee80211_get_rate_duration(struct ieee80211_hw *hw, (status->encoding == RX_ENC_HE && streams > 8))) return 0; + if (idx >= MCS_GROUP_RATES) + return 0; + duration = airtime_mcs_groups[group].duration[idx]; duration <<= airtime_mcs_groups[group].shift; *overhead = 36 + (streams << 2); From 2e7ec190a0e38aaa8a6d87fd5f804ec07947febc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 25 Nov 2022 22:34:42 +1100 Subject: [PATCH 736/963] powerpc/64s: Add missing declaration for machine_check_early_boot() There's no declaration for machine_check_early_boot(), which leads to a build failure with W=1. Add one. Fixes: 2f5182cffa43 ("powerpc/64s: early boot machine check handler") Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20221125132521.2167039-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/interrupt.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4745bb9998bd..6d8492b6e2b8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs) /* kernel/traps.c */ DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); #ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot); DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async); #endif DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); From 2f3893437a4ebf2e892ca172e9e122841319d675 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 20 Nov 2022 16:57:41 +0000 Subject: [PATCH 737/963] io_uring: cmpxchg for poll arm refs release Replace atomically substracting the ownership reference at the end of arming a poll with a cmpxchg. We try to release ownership by setting 0 assuming that poll_refs didn't change while we were arming. If it did change, we keep the ownership and use it to queue a tw, which is fully capable to process all events and (even tolerates spurious wake ups). It's a bit more elegant as we reduce races b/w setting the cancellation flag and getting refs with this release, and with that we don't have to worry about any kinds of underflows. It's not the fastest path for polling. The performance difference b/w cmpxchg and atomic dec is usually negligible and it's not the fastest path. Cc: stable@vger.kernel.org Fixes: aa43477b04025 ("io_uring: poll rework") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0c95251624397ea6def568ff040cad2d7926fd51.1668963050.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/poll.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index 055632e9092a..1b78b527075d 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -518,7 +518,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; - int v; INIT_HLIST_NODE(&req->hash_node); req->work.cancel_seq = atomic_read(&ctx->cancel_seq); @@ -586,11 +585,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (ipt->owning) { /* - * Release ownership. If someone tried to queue a tw while it was - * locked, kick it off for them. + * Try to release ownership. If we see a change of state, e.g. + * poll was waken up, queue up a tw, it'll deal with it. */ - v = atomic_dec_return(&req->poll_refs); - if (unlikely(v & IO_POLL_REF_MASK)) + if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) __io_poll_execute(req, 0); } return 0; From a26a35e9019fd70bf3cf647dcfdae87abc7bacea Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 20 Nov 2022 16:57:42 +0000 Subject: [PATCH 738/963] io_uring: make poll refs more robust poll_refs carry two functions, the first is ownership over the request. The second is notifying the io_poll_check_events() that there was an event but wake up couldn't grab the ownership, so io_poll_check_events() should retry. We want to make poll_refs more robust against overflows. Instead of always incrementing it, which covers two purposes with one atomic, check if poll_refs is elevated enough and if so set a retry flag without attempts to grab ownership. The gap between the bias check and following atomics may seem racy, but we don't need it to be strict. Moreover there might only be maximum 4 parallel updates: by the first and the second poll entries, __io_arm_poll_handler() and cancellation. From those four, only poll wake ups may be executed multiple times, but they're protected by a spin. Cc: stable@vger.kernel.org Reported-by: Lin Ma Fixes: aa43477b04025 ("io_uring: poll rework") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/c762bc31f8683b3270f3587691348a7119ef9c9d.1668963050.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/poll.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index 1b78b527075d..b444b7d87697 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -40,7 +40,14 @@ struct io_poll_table { }; #define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK GENMASK(30, 0) +#define IO_POLL_RETRY_FLAG BIT(30) +#define IO_POLL_REF_MASK GENMASK(29, 0) + +/* + * We usually have 1-2 refs taken, 128 is more than enough and we want to + * maximise the margin between this amount and the moment when it overflows. + */ +#define IO_POLL_REF_BIAS 128 #define IO_WQE_F_DOUBLE 1 @@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) return priv & IO_WQE_F_DOUBLE; } +static bool io_poll_get_ownership_slowpath(struct io_kiocb *req) +{ + int v; + + /* + * poll_refs are already elevated and we don't have much hope for + * grabbing the ownership. Instead of incrementing set a retry flag + * to notify the loop that there might have been some change. + */ + v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); + if (v & IO_POLL_REF_MASK) + return false; + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + /* * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can * bump it and acquire ownership. It's disallowed to modify requests while not @@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe) */ static inline bool io_poll_get_ownership(struct io_kiocb *req) { + if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) + return io_poll_get_ownership_slowpath(req); return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); } @@ -235,6 +259,16 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) */ if ((v & IO_POLL_REF_MASK) != 1) req->cqe.res = 0; + if (v & IO_POLL_RETRY_FLAG) { + req->cqe.res = 0; + /* + * We won't find new events that came in between + * vfs_poll and the ref put unless we clear the flag + * in advance. + */ + atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); + v &= ~IO_POLL_RETRY_FLAG; + } /* the mask was stashed in __io_poll_execute */ if (!req->cqe.res) { From 9d94c04c0db024922e886c9fd429659f22f48ea4 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Wed, 23 Nov 2022 02:40:15 +0800 Subject: [PATCH 739/963] io_uring/filetable: fix file reference underflow There is an interesting reference bug when -ENOMEM occurs in calling of io_install_fixed_file(). KASan report like below: [ 14.057131] ================================================================== [ 14.059161] BUG: KASAN: use-after-free in unix_get_socket+0x10/0x90 [ 14.060975] Read of size 8 at addr ffff88800b09cf20 by task kworker/u8:2/45 [ 14.062684] [ 14.062768] CPU: 2 PID: 45 Comm: kworker/u8:2 Not tainted 6.1.0-rc4 #1 [ 14.063099] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 [ 14.063666] Workqueue: events_unbound io_ring_exit_work [ 14.063936] Call Trace: [ 14.064065] [ 14.064175] dump_stack_lvl+0x34/0x48 [ 14.064360] print_report+0x172/0x475 [ 14.064547] ? _raw_spin_lock_irq+0x83/0xe0 [ 14.064758] ? __virt_addr_valid+0xef/0x170 [ 14.064975] ? unix_get_socket+0x10/0x90 [ 14.065167] kasan_report+0xad/0x130 [ 14.065353] ? unix_get_socket+0x10/0x90 [ 14.065553] unix_get_socket+0x10/0x90 [ 14.065744] __io_sqe_files_unregister+0x87/0x1e0 [ 14.065989] ? io_rsrc_refs_drop+0x1c/0xd0 [ 14.066199] io_ring_exit_work+0x388/0x6a5 [ 14.066410] ? io_uring_try_cancel_requests+0x5bf/0x5bf [ 14.066674] ? try_to_wake_up+0xdb/0x910 [ 14.066873] ? virt_to_head_page+0xbe/0xbe [ 14.067080] ? __schedule+0x574/0xd20 [ 14.067273] ? read_word_at_a_time+0xe/0x20 [ 14.067492] ? strscpy+0xb5/0x190 [ 14.067665] process_one_work+0x423/0x710 [ 14.067879] worker_thread+0x2a2/0x6f0 [ 14.068073] ? process_one_work+0x710/0x710 [ 14.068284] kthread+0x163/0x1a0 [ 14.068454] ? kthread_complete_and_exit+0x20/0x20 [ 14.068697] ret_from_fork+0x22/0x30 [ 14.068886] [ 14.069000] [ 14.069088] Allocated by task 289: [ 14.069269] kasan_save_stack+0x1e/0x40 [ 14.069463] kasan_set_track+0x21/0x30 [ 14.069652] __kasan_slab_alloc+0x58/0x70 [ 14.069899] kmem_cache_alloc+0xc5/0x200 [ 14.070100] __alloc_file+0x20/0x160 [ 14.070283] alloc_empty_file+0x3b/0xc0 [ 14.070479] path_openat+0xc3/0x1770 [ 14.070689] do_filp_open+0x150/0x270 [ 14.070888] do_sys_openat2+0x113/0x270 [ 14.071081] __x64_sys_openat+0xc8/0x140 [ 14.071283] do_syscall_64+0x3b/0x90 [ 14.071466] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 14.071791] [ 14.071874] Freed by task 0: [ 14.072027] kasan_save_stack+0x1e/0x40 [ 14.072224] kasan_set_track+0x21/0x30 [ 14.072415] kasan_save_free_info+0x2a/0x50 [ 14.072627] __kasan_slab_free+0x106/0x190 [ 14.072858] kmem_cache_free+0x98/0x340 [ 14.073075] rcu_core+0x427/0xe50 [ 14.073249] __do_softirq+0x110/0x3cd [ 14.073440] [ 14.073523] Last potentially related work creation: [ 14.073801] kasan_save_stack+0x1e/0x40 [ 14.074017] __kasan_record_aux_stack+0x97/0xb0 [ 14.074264] call_rcu+0x41/0x550 [ 14.074436] task_work_run+0xf4/0x170 [ 14.074619] exit_to_user_mode_prepare+0x113/0x120 [ 14.074858] syscall_exit_to_user_mode+0x1d/0x40 [ 14.075092] do_syscall_64+0x48/0x90 [ 14.075272] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 14.075529] [ 14.075612] Second to last potentially related work creation: [ 14.075900] kasan_save_stack+0x1e/0x40 [ 14.076098] __kasan_record_aux_stack+0x97/0xb0 [ 14.076325] task_work_add+0x72/0x1b0 [ 14.076512] fput+0x65/0xc0 [ 14.076657] filp_close+0x8e/0xa0 [ 14.076825] __x64_sys_close+0x15/0x50 [ 14.077019] do_syscall_64+0x3b/0x90 [ 14.077199] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 14.077448] [ 14.077530] The buggy address belongs to the object at ffff88800b09cf00 [ 14.077530] which belongs to the cache filp of size 232 [ 14.078105] The buggy address is located 32 bytes inside of [ 14.078105] 232-byte region [ffff88800b09cf00, ffff88800b09cfe8) [ 14.078685] [ 14.078771] The buggy address belongs to the physical page: [ 14.079046] page:000000001bd520e7 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88800b09de00 pfn:0xb09c [ 14.079575] head:000000001bd520e7 order:1 compound_mapcount:0 compound_pincount:0 [ 14.079946] flags: 0x100000000010200(slab|head|node=0|zone=1) [ 14.080244] raw: 0100000000010200 0000000000000000 dead000000000001 ffff88800493cc80 [ 14.080629] raw: ffff88800b09de00 0000000080190018 00000001ffffffff 0000000000000000 [ 14.081016] page dumped because: kasan: bad access detected [ 14.081293] [ 14.081376] Memory state around the buggy address: [ 14.081618] ffff88800b09ce00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 14.081974] ffff88800b09ce80: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc [ 14.082336] >ffff88800b09cf00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 14.082690] ^ [ 14.082909] ffff88800b09cf80: fb fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc [ 14.083266] ffff88800b09d000: fc fc fc fc fc fc fc fc fa fb fb fb fb fb fb fb [ 14.083622] ================================================================== The actual tracing of this bug is shown below: commit 8c71fe750215 ("io_uring: ensure fput() called correspondingly when direct install fails") adds an additional fput() in io_fixed_fd_install() when io_file_bitmap_get() returns error values. In that case, the routine will never make it to io_install_fixed_file() due to an early return. static int io_fixed_fd_install(...) { if (alloc_slot) { ... ret = io_file_bitmap_get(ctx); if (unlikely(ret < 0)) { io_ring_submit_unlock(ctx, issue_flags); fput(file); return ret; } ... } ... ret = io_install_fixed_file(req, file, issue_flags, file_slot); ... } In the above scenario, the reference is okay as io_fixed_fd_install() ensures the fput() is called when something bad happens, either via bitmap or via inner io_install_fixed_file(). However, the commit 61c1b44a21d7 ("io_uring: fix deadlock on iowq file slot alloc") breaks the balance because it places fput() into the common path for both io_file_bitmap_get() and io_install_fixed_file(). Since io_install_fixed_file() handles the fput() itself, the reference underflow come across then. There are some extra commits make the current code into io_fixed_fd_install() -> __io_fixed_fd_install() -> io_install_fixed_file() However, the fact that there is an extra fput() is called if io_install_fixed_file() calls fput(). Traversing through the code, I find that the existing two callers to __io_fixed_fd_install(): io_fixed_fd_install() and io_msg_send_fd() have fput() when handling error return, this patch simply removes the fput() in io_install_fixed_file() to fix the bug. Fixes: 61c1b44a21d7 ("io_uring: fix deadlock on iowq file slot alloc") Signed-off-by: Lin Ma Link: https://lore.kernel.org/r/be4ba4b.5d44.184a0a406a4.Coremail.linma@zju.edu.cn Signed-off-by: Jens Axboe --- io_uring/filetable.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 7b473259f3f4..68dfc6936aa7 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -101,8 +101,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); - if (ret) - fput(file); return ret; } From 12ad3d2d6c5b0131a6052de91360849e3e154846 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 25 Nov 2022 07:15:54 -0700 Subject: [PATCH 740/963] io_uring/poll: fix poll_refs race with cancelation There is an interesting race condition of poll_refs which could result in a NULL pointer dereference. The crash trace is like: KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] CPU: 0 PID: 30781 Comm: syz-executor.2 Not tainted 6.0.0-g493ffd6605b2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:io_poll_remove_entry io_uring/poll.c:154 [inline] RIP: 0010:io_poll_remove_entries+0x171/0x5b4 io_uring/poll.c:190 Code: ... RSP: 0018:ffff88810dfefba0 EFLAGS: 00010202 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000040000 RDX: ffffc900030c4000 RSI: 000000000003ffff RDI: 0000000000040000 RBP: 0000000000000008 R08: ffffffff9764d3dd R09: fffffbfff3836781 R10: fffffbfff3836781 R11: 0000000000000000 R12: 1ffff11003422d60 R13: ffff88801a116b04 R14: ffff88801a116ac0 R15: dffffc0000000000 FS: 00007f9c07497700(0000) GS:ffff88811a600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffb5c00ea98 CR3: 0000000105680005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: io_apoll_task_func+0x3f/0xa0 io_uring/poll.c:299 handle_tw_list io_uring/io_uring.c:1037 [inline] tctx_task_work+0x37e/0x4f0 io_uring/io_uring.c:1090 task_work_run+0x13a/0x1b0 kernel/task_work.c:177 get_signal+0x2402/0x25a0 kernel/signal.c:2635 arch_do_signal_or_restart+0x3b/0x660 arch/x86/kernel/signal.c:869 exit_to_user_mode_loop kernel/entry/common.c:166 [inline] exit_to_user_mode_prepare+0xc2/0x160 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:283 [inline] syscall_exit_to_user_mode+0x58/0x160 kernel/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x63/0xcd The root cause for this is a tiny overlooking in io_poll_check_events() when cocurrently run with poll cancel routine io_poll_cancel_req(). The interleaving to trigger use-after-free: CPU0 | CPU1 | io_apoll_task_func() | io_poll_cancel_req() io_poll_check_events() | // do while first loop | v = atomic_read(...) | // v = poll_refs = 1 | ... | io_poll_mark_cancelled() | atomic_or() | // poll_refs = IO_POLL_CANCEL_FLAG | 1 | atomic_sub_return(...) | // poll_refs = IO_POLL_CANCEL_FLAG | // loop continue | | | io_poll_execute() | io_poll_get_ownership() | // poll_refs = IO_POLL_CANCEL_FLAG | 1 | // gets the ownership v = atomic_read(...) | // poll_refs not change | | if (v & IO_POLL_CANCEL_FLAG) | return -ECANCELED; | // io_poll_check_events return | // will go into | // io_req_complete_failed() free req | | | io_apoll_task_func() | // also go into io_req_complete_failed() And the interleaving to trigger the kernel WARNING: CPU0 | CPU1 | io_apoll_task_func() | io_poll_cancel_req() io_poll_check_events() | // do while first loop | v = atomic_read(...) | // v = poll_refs = 1 | ... | io_poll_mark_cancelled() | atomic_or() | // poll_refs = IO_POLL_CANCEL_FLAG | 1 | atomic_sub_return(...) | // poll_refs = IO_POLL_CANCEL_FLAG | // loop continue | | v = atomic_read(...) | // v = IO_POLL_CANCEL_FLAG | | io_poll_execute() | io_poll_get_ownership() | // poll_refs = IO_POLL_CANCEL_FLAG | 1 | // gets the ownership | WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) | // v & IO_POLL_REF_MASK = 0 WARN | | | io_apoll_task_func() | // also go into io_req_complete_failed() By looking up the source code and communicating with Pavel, the implementation of this atomic poll refs should continue the loop of io_poll_check_events() just to avoid somewhere else to grab the ownership. Therefore, this patch simply adds another AND operation to make sure the loop will stop if it finds the poll_refs is exactly equal to IO_POLL_CANCEL_FLAG. Since io_poll_cancel_req() grabs ownership and will finally make its way to io_req_complete_failed(), the req will be reclaimed as expected. Fixes: aa43477b0402 ("io_uring: poll rework") Signed-off-by: Lin Ma Reviewed-by: Pavel Begunkov [axboe: tweak description and code style] Signed-off-by: Jens Axboe --- io_uring/poll.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index b444b7d87697..d9bf1767867e 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -308,7 +308,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) * Release all references, retry if someone tried to restart * task_work while we were executing it. */ - } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); + } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & + IO_POLL_REF_MASK); return IOU_POLL_NO_ACTION; } From f33bcc506050f89433a52a3052054d4ebd37b1c1 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 25 Nov 2022 16:23:47 +0000 Subject: [PATCH 741/963] ASoC: ops: Correct bounds check for second channel on SX controls Currently the check against the max value for the control is being applied after the value has had the minimum applied and been masked. But the max value simply indicates the number of volume levels on an SX control, and as such should just be applied on the raw value. Fixes: 97eea946b939 ("ASoC: ops: Check bounds for second channel in snd_soc_put_volsw_sx()") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20221125162348.1288005-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/soc-ops.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 1970bda074d8..55b009d3c681 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -464,16 +464,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, ret = err; if (snd_soc_volsw_is_stereo(mc)) { - unsigned int val2; - - val_mask = mask << rshift; - val2 = (ucontrol->value.integer.value[1] + min) & mask; + unsigned int val2 = ucontrol->value.integer.value[1]; if (mc->platform_max && val2 > mc->platform_max) return -EINVAL; if (val2 > max) return -EINVAL; + val_mask = mask << rshift; + val2 = (val2 + min) & mask; val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, From 3d1bb6cc1a654c8693a85b1d262e610196edec8b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 25 Nov 2022 16:23:48 +0000 Subject: [PATCH 742/963] ASoC: cs42l51: Correct PGA Volume minimum value The table in the datasheet actually shows the volume values in the wrong order, with the two -3dB values being reversed. This appears to have caused the lower of the two values to be used in the driver when the higher should have been, correct this mixup. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20221125162348.1288005-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l51.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 51721edd8f53..e88d9ff95cdf 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -143,7 +143,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0xA0, 96, adc_att_tlv), SOC_DOUBLE_R_SX_TLV("PGA Volume", CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, - 0, 0x19, 30, pga_tlv), + 0, 0x1A, 30, pga_tlv), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), From 7cfe7a09489c1cefee7181e07b5f2bcbaebd9f41 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Nov 2022 09:36:29 -0700 Subject: [PATCH 743/963] io_uring: clear TIF_NOTIFY_SIGNAL if set and task_work not available With how task_work is added and signaled, we can have TIF_NOTIFY_SIGNAL set and no task_work pending as it got run in a previous loop. Treat TIF_NOTIFY_SIGNAL like get_signal(), always clear it if set regardless of whether or not task_work is pending to run. Cc: stable@vger.kernel.org Fixes: 46a525e199e4 ("io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL") Signed-off-by: Jens Axboe --- io_uring/io_uring.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index cef5ff924e63..50bc3af44953 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -238,9 +238,14 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) static inline int io_run_task_work(void) { + /* + * Always check-and-clear the task_work notification signal. With how + * signaling works for task_work, we can find it set with nothing to + * run. We need to clear it for that case, like get_signal() does. + */ + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + clear_notify_signal(); if (task_work_pending(current)) { - if (test_thread_flag(TIF_NOTIFY_SIGNAL)) - clear_notify_signal(); __set_current_state(TASK_RUNNING); task_work_run(); return 1; From 869e4ae4cd2a23d625aaa14ae62dbebf768cb77d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Nov 2022 19:20:53 -0800 Subject: [PATCH 744/963] nios2: add FORCE for vmlinuz.gz Add FORCE to placate a warning from make: arch/nios2/boot/Makefile:24: FORCE prerequisite is missing Fixes: 2fc8483fdcde ("nios2: Build infrastructure") Signed-off-by: Randy Dunlap Reviewed-by: Masahiro Yamada --- arch/nios2/boot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile index 8c3ad76602f3..29c11a06b750 100644 --- a/arch/nios2/boot/Makefile +++ b/arch/nios2/boot/Makefile @@ -20,7 +20,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) -$(obj)/vmImage: $(obj)/vmlinux.gz +$(obj)/vmImage: $(obj)/vmlinux.gz FORCE $(call if_changed,uimage) @$(kecho) 'Kernel: $@ is ready' From 369eb2c9f1f72adbe91e0ea8efb130f0a2ba11a6 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 23 Nov 2022 21:28:08 +0800 Subject: [PATCH 745/963] net: phy: fix null-ptr-deref while probe() failed I got a null-ptr-deref report as following when doing fault injection test: BUG: kernel NULL pointer dereference, address: 0000000000000058 Oops: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 PID: 253 Comm: 507-spi-dm9051 Tainted: G B N 6.1.0-rc3+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:klist_put+0x2d/0xd0 Call Trace: klist_remove+0xf1/0x1c0 device_release_driver_internal+0x23e/0x2d0 bus_remove_device+0x1bd/0x240 device_del+0x357/0x770 phy_device_remove+0x11/0x30 mdiobus_unregister+0xa5/0x140 release_nodes+0x6a/0xa0 devres_release_all+0xf8/0x150 device_unbind_cleanup+0x19/0xd0 //probe path: phy_device_register() device_add() phy_connect phy_attach_direct() //set device driver probe() //it's failed, driver is not bound device_bind_driver() // probe failed, it's not called //remove path: phy_device_remove() device_del() device_release_driver_internal() __device_release_driver() //dev->drv is not NULL klist_remove() <- knode_driver is not added yet, cause null-ptr-deref In phy_attach_direct(), after setting the 'dev->driver', probe() fails, device_bind_driver() is not called, so the knode_driver->n_klist is not set, then it causes null-ptr-deref in __device_release_driver() while deleting device. Fix this by setting dev->driver to NULL in the error path in phy_attach_direct(). Fixes: e13934563db0 ("[PATCH] PHY Layer fixup") Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 57849ac0384e..a72adabfd2a7 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1520,6 +1520,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, error_module_put: module_put(d->driver->owner); + d->driver = NULL; error_put_device: put_device(d); if (ndev_owner != bus->owner) From b7b275e60bcd5f89771e865a8239325f86d9927d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Nov 2022 13:31:48 -0800 Subject: [PATCH 746/963] Linux 6.1-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6f846b1f2618..78525ebea876 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From ec851b23084b3a0af8bf0f5e51d33a8d678bdc49 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Fri, 25 Nov 2022 22:07:57 +0100 Subject: [PATCH 747/963] gpiolib: fix memory leak in gpiochip_setup_dev() Here is a backtrace report about memory leak detected in gpiochip_setup_dev(): unreferenced object 0xffff88810b406400 (size 512): comm "python3", pid 1682, jiffies 4295346908 (age 24.090s) backtrace: kmalloc_trace device_add device_private_init at drivers/base/core.c:3361 (inlined by) device_add at drivers/base/core.c:3411 cdev_device_add gpiolib_cdev_register gpiochip_setup_dev gpiochip_add_data_with_key gcdev_register() & gcdev_unregister() would call device_add() & device_del() (no matter CONFIG_GPIO_CDEV is enabled or not) to register/unregister device. However, if device_add() succeeds, some resource (like struct device_private allocated by device_private_init()) is not released by device_del(). Therefore, after device_add() succeeds by gcdev_register(), it needs to call put_device() to release resource in the error handle path. Here we move forward the register of release function, and let it release every piece of resource by put_device() instead of kfree(). While at it, fix another subtle issue, i.e. when gc->ngpio is equal to 0, we still call kcalloc() and, in case of further error, kfree() on the ZERO_PTR pointer, which is not NULL. It's not a bug per se, but rather waste of the resources and potentially wrong expectation about contents of the gdev->descs variable. Fixes: 159f3cd92f17 ("gpiolib: Defer gpio device setup until after gpiolib initialization") Signed-off-by: Zeng Heng Co-developed-by: Andy Shevchenko Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4756ea08894f..a70522aef355 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -526,12 +526,13 @@ static int gpiochip_setup_dev(struct gpio_device *gdev) if (ret) return ret; + /* From this point, the .release() function cleans up gpio_device */ + gdev->dev.release = gpiodevice_release; + ret = gpiochip_sysfs_register(gdev); if (ret) goto err_remove_device; - /* From this point, the .release() function cleans up gpio_device */ - gdev->dev.release = gpiodevice_release; dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base, gdev->base + gdev->ngpio - 1, gdev->chip->label ? : "generic"); @@ -597,10 +598,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct fwnode_handle *fwnode = NULL; struct gpio_device *gdev; unsigned long flags; - int base = gc->base; unsigned int i; + u32 ngpios = 0; + int base = 0; int ret = 0; - u32 ngpios; if (gc->fwnode) fwnode = gc->fwnode; @@ -647,17 +648,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, else gdev->owner = THIS_MODULE; - gdev->descs = kcalloc(gc->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL); - if (!gdev->descs) { - ret = -ENOMEM; - goto err_free_dev_name; - } - /* * Try the device properties if the driver didn't supply the number * of GPIO lines. */ - if (gc->ngpio == 0) { + ngpios = gc->ngpio; + if (ngpios == 0) { ret = device_property_read_u32(&gdev->dev, "ngpios", &ngpios); if (ret == -ENODATA) /* @@ -668,7 +664,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, */ ngpios = 0; else if (ret) - goto err_free_descs; + goto err_free_dev_name; gc->ngpio = ngpios; } @@ -676,13 +672,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, if (gc->ngpio == 0) { chip_err(gc, "tried to insert a GPIO chip with zero lines\n"); ret = -EINVAL; - goto err_free_descs; + goto err_free_dev_name; } if (gc->ngpio > FASTPATH_NGPIO) chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n", gc->ngpio, FASTPATH_NGPIO); + gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL); + if (!gdev->descs) { + ret = -ENOMEM; + goto err_free_dev_name; + } + gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL); if (!gdev->label) { ret = -ENOMEM; @@ -701,11 +703,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * it may be a pipe dream. It will not happen before we get rid * of the sysfs interface anyways. */ + base = gc->base; if (base < 0) { base = gpiochip_find_base(gc->ngpio); if (base < 0) { - ret = base; spin_unlock_irqrestore(&gpio_lock, flags); + ret = base; + base = 0; goto err_free_label; } /* @@ -816,6 +820,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, err_free_gpiochip_mask: gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); + if (gdev->dev.release) { + /* release() has been registered by gpiochip_setup_dev() */ + put_device(&gdev->dev); + goto err_print_message; + } err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); list_del(&gdev->list); @@ -829,13 +838,14 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, err_free_ida: ida_free(&gpio_ida, gdev->id); err_free_gdev: + kfree(gdev); +err_print_message: /* failures here can mean systems won't boot... */ if (ret != -EPROBE_DEFER) { pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__, - gdev->base, gdev->base + gdev->ngpio - 1, + base, base + (int)ngpios - 1, gc->label ? : "generic", ret); } - kfree(gdev); return ret; } EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key); From 46fb6512538d201d9a5b2bd7138b6751c37fdf0b Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Thu, 24 Nov 2022 11:03:08 +0800 Subject: [PATCH 748/963] net: ethernet: ti: am65-cpsw: fix error handling in am65_cpsw_nuss_probe() The am65_cpsw_nuss_cleanup_ndev() function calls unregister_netdev() even if register_netdev() fails, which triggers WARN_ON(1) in unregister_netdevice_many(). To fix it, make sure that unregister_netdev() is called only on registered netdev. Compile tested only. Fixes: 84b4aa493249 ("net: ethernet: ti: am65-cpsw: add multi port support in mac-only mode") Signed-off-by: Zhang Changzhong Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c50b137f92d7..d04a239c658e 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2082,7 +2082,7 @@ static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) for (i = 0; i < common->port_num; i++) { port = &common->ports[i]; - if (port->ndev) + if (port->ndev && port->ndev->reg_state == NETREG_REGISTERED) unregister_netdev(port->ndev); } } From b8f79dccd38edf7db4911c353d9cd792ab13a327 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 24 Nov 2022 07:09:17 +0000 Subject: [PATCH 749/963] net: net_netdev: Fix error handling in ntb_netdev_init_module() The ntb_netdev_init_module() returns the ntb_transport_register_client() directly without checking its return value, if ntb_transport_register_client() failed, the NTB client device is not unregistered. Fix by unregister NTB client device when ntb_transport_register_client() failed. Fixes: 548c237c0a99 ("net: Add support for NTB virtual ethernet device") Signed-off-by: Yuan Can Signed-off-by: David S. Miller --- drivers/net/ntb_netdev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 464d88ca8ab0..a4abea921046 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -484,7 +484,14 @@ static int __init ntb_netdev_init_module(void) rc = ntb_transport_register_client_dev(KBUILD_MODNAME); if (rc) return rc; - return ntb_transport_register_client(&ntb_netdev_client); + + rc = ntb_transport_register_client(&ntb_netdev_client); + if (rc) { + ntb_transport_unregister_client_dev(KBUILD_MODNAME); + return rc; + } + + return 0; } module_init(ntb_netdev_init_module); From dcc14cfd7debe11b825cb077e75d91d2575b4cb8 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 24 Nov 2022 16:10:05 +0800 Subject: [PATCH 750/963] net/9p: Fix a potential socket leak in p9_socket_open Both p9_fd_create_tcp() and p9_fd_create_unix() will call p9_socket_open(). If the creation of p9_trans_fd fails, p9_fd_create_tcp() and p9_fd_create_unix() will return an error directly instead of releasing the cscoket, which will result in a socket leak. This patch adds sock_release() to fix the leak issue. Fixes: 6b18662e239a ("9p connect fixes") Signed-off-by: Wang Hai ACKed-by: Al Viro Signed-off-by: David S. Miller --- net/9p/trans_fd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 56a186768750..f834726d21ea 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -860,8 +860,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket) struct file *file; p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); - if (!p) + if (!p) { + sock_release(csocket); return -ENOMEM; + } csocket->sk->sk_allocation = GFP_NOIO; file = sock_alloc_file(csocket, 0, NULL); From 9256db4e45e8b497b0e993cc3ed4ad08eb2389b6 Mon Sep 17 00:00:00 2001 From: Yuri Karpov Date: Thu, 24 Nov 2022 11:43:03 +0300 Subject: [PATCH 751/963] net: ethernet: nixge: fix NULL dereference In function nixge_hw_dma_bd_release() dereference of NULL pointer priv->rx_bd_v is possible for the case of its allocation failure in nixge_hw_dma_bd_init(). Move for() loop with priv->rx_bd_v dereference under the check for its validity. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 492caffa8a1a ("net: ethernet: nixge: Add support for National Instruments XGE netdev") Signed-off-by: Yuri Karpov Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/ni/nixge.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 19d043b593cc..62320be4de5a 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -249,25 +249,26 @@ static void nixge_hw_dma_bd_release(struct net_device *ndev) struct sk_buff *skb; int i; - for (i = 0; i < RX_BD_NUM; i++) { - phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - phys); + if (priv->rx_bd_v) { + for (i = 0; i < RX_BD_NUM; i++) { + phys_addr = nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + phys); - dma_unmap_single(ndev->dev.parent, phys_addr, - NIXGE_MAX_JUMBO_FRAME_SIZE, - DMA_FROM_DEVICE); + dma_unmap_single(ndev->dev.parent, phys_addr, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); - skb = (struct sk_buff *)(uintptr_t) - nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], - sw_id_offset); - dev_kfree_skb(skb); - } + skb = (struct sk_buff *)(uintptr_t) + nixge_hw_dma_bd_get_addr(&priv->rx_bd_v[i], + sw_id_offset); + dev_kfree_skb(skb); + } - if (priv->rx_bd_v) dma_free_coherent(ndev->dev.parent, sizeof(*priv->rx_bd_v) * RX_BD_NUM, priv->rx_bd_v, priv->rx_bd_p); + } if (priv->tx_skb) devm_kfree(ndev->dev.parent, priv->tx_skb); From 7642cc28fd37a15feacff0ccf0e5a8466630df5d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 24 Nov 2022 09:06:48 +0000 Subject: [PATCH 752/963] net: phylink: fix PHY validation with rate adaption Tim Harvey reports that link modes which he does not expect to be supported are being advertised, and this is because of the workaround we have for PHYs that switch interface modes. Fix this up by checking whether rate matching will be used for the requested interface mode, and if rate matching will be used, perform validation only with the requested interface mode, rather than invoking this workaround. Signed-off-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6547b6cc6cbe..2805b04d6402 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1603,19 +1603,29 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, linkmode_copy(supported, phy->supported); linkmode_copy(config.advertising, phy->advertising); - /* Clause 45 PHYs switch their Serdes lane between several different - * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G - * speeds. We really need to know which interface modes the PHY and - * MAC supports to properly work out which linkmodes can be supported. + /* Check whether we would use rate matching for the proposed interface + * mode. */ - if (phy->is_c45 && + config.rate_matching = phy_get_rate_matching(phy, interface); + + /* Clause 45 PHYs may switch their Serdes lane between, e.g. 10GBASE-R, + * 5GBASE-R, 2500BASE-X and SGMII if they are not using rate matching. + * For some interface modes (e.g. RXAUI, XAUI and USXGMII) switching + * their Serdes is either unnecessary or not reasonable. + * + * For these which switch interface modes, we really need to know which + * interface modes the PHY supports to properly work out which ethtool + * linkmodes can be supported. For now, as a work-around, we validate + * against all interface modes, which may lead to more ethtool link + * modes being advertised than are actually supported. + */ + if (phy->is_c45 && config.rate_matching == RATE_MATCH_NONE && interface != PHY_INTERFACE_MODE_RXAUI && interface != PHY_INTERFACE_MODE_XAUI && interface != PHY_INTERFACE_MODE_USXGMII) config.interface = PHY_INTERFACE_MODE_NA; else config.interface = interface; - config.rate_matching = phy_get_rate_matching(phy, config.interface); ret = phylink_validate(pl, supported, &config); if (ret) { From 985a02e75881b73a43c9433a718b49d272a9dd6b Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 24 Nov 2022 16:07:46 +0530 Subject: [PATCH 753/963] net: wwan: iosm: fix kernel test robot reported error sparse warnings - iosm_ipc_mux_codec.c:1474 using plain integer as NULL pointer. Use skb_trim() to reset skb tail & len. Fixes: 9413491e20e1 ("net: iosm: encode or decode datagram") Reported-by: kernel test robot Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index d41e373f9c0a..c16365123660 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -1471,8 +1471,7 @@ void ipc_mux_ul_encoded_process(struct iosm_mux *ipc_mux, struct sk_buff *skb) ipc_mux->ul_data_pend_bytes); /* Reset the skb settings. */ - skb->tail = 0; - skb->len = 0; + skb_trim(skb, 0); /* Add the consumed ADB to the free list. */ skb_queue_tail((&ipc_mux->ul_adb.free_list), skb); From 4a99e3c8ed888577b947cbed97d88c9706896105 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 24 Nov 2022 16:08:03 +0530 Subject: [PATCH 754/963] net: wwan: iosm: fix dma_alloc_coherent incompatible pointer type Fix build error reported on armhf while preparing 6.1-rc5 for Debian. iosm_ipc_protocol.c:244:36: error: passing argument 3 of 'dma_alloc_coherent' from incompatible pointer type. Change phy_ap_shm type from phys_addr_t to dma_addr_t. Fixes: faed4c6f6f48 ("net: iosm: shared memory protocol") Reported-by: Bonaccorso Salvatore Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_protocol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol.h b/drivers/net/wwan/iosm/iosm_ipc_protocol.h index 9b3a6d86ece7..289397c4ea6c 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol.h +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol.h @@ -122,7 +122,7 @@ struct iosm_protocol { struct iosm_imem *imem; struct ipc_rsp *rsp_ring[IPC_MEM_MSG_ENTRIES]; struct device *dev; - phys_addr_t phy_ap_shm; + dma_addr_t phy_ap_shm; u32 old_msg_tail; }; From 2290a1d46bf30f9e0bcf49ad20d5c30d0e099989 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 24 Nov 2022 16:08:17 +0530 Subject: [PATCH 755/963] net: wwan: iosm: fix crash in peek throughput test Peek throughput UL test is resulting in crash. If the UL transfer block free list is exhaust, the peeked skb is freed. In the next transfer freed skb is referred from UL list which results in crash. Don't free the skb if UL transfer blocks are unavailable. The pending skb will be picked for transfer on UL transfer block available. Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index c16365123660..738420bd14af 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -1207,10 +1207,9 @@ static int mux_ul_dg_update_tbl_index(struct iosm_mux *ipc_mux, qlth_n_ql_size, ul_list); ipc_mux_ul_adb_finish(ipc_mux); if (ipc_mux_ul_adb_allocate(ipc_mux, adb, &ipc_mux->size_needed, - IOSM_AGGR_MUX_SIG_ADBH)) { - dev_kfree_skb(src_skb); + IOSM_AGGR_MUX_SIG_ADBH)) return -ENOMEM; - } + ipc_mux->size_needed = le32_to_cpu(adb->adbh->block_length); ipc_mux->size_needed += offsetof(struct mux_adth, dg); From c34ca4f32c24bf748493b49085e43cd714cf8357 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Thu, 24 Nov 2022 16:08:32 +0530 Subject: [PATCH 756/963] net: wwan: iosm: fix incorrect skb length skb passed to network layer contains incorrect length. In mux aggregation protocol, the datagram block received from device contains block signature, packet & datagram header. The right skb len to be calculated by subracting datagram pad len from datagram length. Whereas in mux lite protocol, the skb contains single datagram so skb len is calculated by subtracting the packet offset from datagram header. Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_mux_codec.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index 738420bd14af..d6b166fc5c0e 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -365,7 +365,8 @@ static void ipc_mux_dl_cmd_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) /* Pass the DL packet to the netif layer. */ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, struct iosm_wwan *wwan, u32 offset, - u8 service_class, struct sk_buff *skb) + u8 service_class, struct sk_buff *skb, + u32 pkt_len) { struct sk_buff *dest_skb = skb_clone(skb, GFP_ATOMIC); @@ -373,7 +374,7 @@ static int ipc_mux_net_receive(struct iosm_mux *ipc_mux, int if_id, return -ENOMEM; skb_pull(dest_skb, offset); - skb_set_tail_pointer(dest_skb, dest_skb->len); + skb_trim(dest_skb, pkt_len); /* Pass the packet to the netif layer. */ dest_skb->priority = service_class; @@ -429,7 +430,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, struct sk_buff *skb) { - u32 pad_len, packet_offset; + u32 pad_len, packet_offset, adgh_len; struct iosm_wwan *wwan; struct mux_adgh *adgh; u8 *block = skb->data; @@ -470,10 +471,12 @@ static void ipc_mux_dl_adgh_decode(struct iosm_mux *ipc_mux, packet_offset = sizeof(*adgh) + pad_len; if_id += ipc_mux->wwan_q_offset; + adgh_len = le16_to_cpu(adgh->length); /* Pass the packet to the netif layer */ rc = ipc_mux_net_receive(ipc_mux, if_id, wwan, packet_offset, - adgh->service_class, skb); + adgh->service_class, skb, + adgh_len - packet_offset); if (rc) { dev_err(ipc_mux->dev, "mux adgh decoding error"); return; @@ -547,7 +550,7 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, int if_id, int nr_of_dg) { u32 dl_head_pad_len = ipc_mux->session[if_id].dl_head_pad_len; - u32 packet_offset, i, rc; + u32 packet_offset, i, rc, dg_len; for (i = 0; i < nr_of_dg; i++, dg++) { if (le32_to_cpu(dg->datagram_index) @@ -562,11 +565,12 @@ static int mux_dl_process_dg(struct iosm_mux *ipc_mux, struct mux_adbh *adbh, packet_offset = le32_to_cpu(dg->datagram_index) + dl_head_pad_len; + dg_len = le16_to_cpu(dg->datagram_length); /* Pass the packet to the netif layer. */ rc = ipc_mux_net_receive(ipc_mux, if_id, ipc_mux->wwan, packet_offset, - dg->service_class, - skb); + dg->service_class, skb, + dg_len - dl_head_pad_len); if (rc) goto dg_error; } From 97d4d394b58777f7056ebba8ffdb4002d0563259 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 24 Nov 2022 13:04:37 +0100 Subject: [PATCH 757/963] netfilter: nft_set_pipapo: Actually validate intervals in fields after the first one Embarrassingly, nft_pipapo_insert() checked for interval validity in the first field only. The start_p and end_p pointers were reset to key data from the first field at every iteration of the loop which was supposed to go over the set fields. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: Pablo Neira Ayuso Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4f9299b9dcdd..06d46d182634 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1162,6 +1162,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); struct nft_pipapo_field *f; + const u8 *start_p, *end_p; int i, bsize_max, err = 0; if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) @@ -1202,9 +1203,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, } /* Validate */ + start_p = start; + end_p = end; nft_pipapo_for_each_field(f, i, m) { - const u8 *start_p = start, *end_p = end; - if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX) return -ENOSPC; From a81047154e7ce4eb8769d5d21adcbc9693542a79 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 24 Nov 2022 12:54:10 -0500 Subject: [PATCH 758/963] netfilter: flowtable_offload: fix using __this_cpu_add in preemptible flow_offload_queue_work() can be called in workqueue without bh disabled, like the call trace showed in my act_ct testing, calling NF_FLOW_TABLE_STAT_INC() there would cause a call trace: BUG: using __this_cpu_add() in preemptible [00000000] code: kworker/u4:0/138560 caller is flow_offload_queue_work+0xec/0x1b0 [nf_flow_table] Workqueue: act_ct_workqueue tcf_ct_flow_table_cleanup_work [act_ct] Call Trace: dump_stack_lvl+0x33/0x46 check_preemption_disabled+0xc3/0xf0 flow_offload_queue_work+0xec/0x1b0 [nf_flow_table] nf_flow_table_iterate+0x138/0x170 [nf_flow_table] nf_flow_table_free+0x140/0x1a0 [nf_flow_table] tcf_ct_flow_table_cleanup_work+0x2f/0x2b0 [act_ct] process_one_work+0x6a3/0x1030 worker_thread+0x8a/0xdf0 This patch fixes it by using NF_FLOW_TABLE_STAT_INC_ATOMIC() instead in flow_offload_queue_work(). Note that for FLOW_CLS_REPLACE branch in flow_offload_queue_work(), it may not be called in preemptible path, but it's good to use NF_FLOW_TABLE_STAT_INC_ATOMIC() for all cases in flow_offload_queue_work(). Fixes: b038177636f8 ("netfilter: nf_flow_table: count pending offload workqueue tasks") Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 00b522890d77..0fdcdb2c9ae4 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -997,13 +997,13 @@ static void flow_offload_queue_work(struct flow_offload_work *offload) struct net *net = read_pnet(&offload->flowtable->net); if (offload->cmd == FLOW_CLS_REPLACE) { - NF_FLOW_TABLE_STAT_INC(net, count_wq_add); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add); queue_work(nf_flow_offload_add_wq, &offload->work); } else if (offload->cmd == FLOW_CLS_DESTROY) { - NF_FLOW_TABLE_STAT_INC(net, count_wq_del); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del); queue_work(nf_flow_offload_del_wq, &offload->work); } else { - NF_FLOW_TABLE_STAT_INC(net, count_wq_stats); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats); queue_work(nf_flow_offload_stats_wq, &offload->work); } } From ed14d225cc7c842f6d4d5a3009f71a44f5852d09 Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Wed, 9 Nov 2022 23:37:34 +0800 Subject: [PATCH 759/963] drm/vmwgfx: Fix race issue calling pin_user_pages pin_user_pages() is unsafe without protection of mmap_lock, fix it by calling pin_user_pages_fast(). Fixes: 7a7a933edd6c ("drm/vmwgfx: Introduce VMware mks-guest-stats") Signed-off-by: Dawei Li Reviewed-by: Martin Krastev Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/TYWP286MB23193621CB443E1E1959A00BCA3E9@TYWP286MB2319.JPNP286.PROD.OUTLOOK.COM --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 089046fa21be..50fa3df0bc0c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -1085,21 +1085,21 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data, reset_ppn_array(pdesc->strsPPNs, ARRAY_SIZE(pdesc->strsPPNs)); /* Pin mksGuestStat user pages and store those in the instance descriptor */ - nr_pinned_stat = pin_user_pages(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat, NULL); + nr_pinned_stat = pin_user_pages_fast(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat); if (num_pages_stat != nr_pinned_stat) goto err_pin_stat; for (i = 0; i < num_pages_stat; ++i) pdesc->statPPNs[i] = page_to_pfn(pages_stat[i]); - nr_pinned_info = pin_user_pages(arg->info, num_pages_info, FOLL_LONGTERM, pages_info, NULL); + nr_pinned_info = pin_user_pages_fast(arg->info, num_pages_info, FOLL_LONGTERM, pages_info); if (num_pages_info != nr_pinned_info) goto err_pin_info; for (i = 0; i < num_pages_info; ++i) pdesc->infoPPNs[i] = page_to_pfn(pages_info[i]); - nr_pinned_strs = pin_user_pages(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs, NULL); + nr_pinned_strs = pin_user_pages_fast(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs); if (num_pages_strs != nr_pinned_strs) goto err_pin_strs; From 6989ea4881c8944fbf04378418bb1af63d875ef8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Nov 2022 00:29:26 +0200 Subject: [PATCH 760/963] pinctrl: intel: Save and restore pins in "direct IRQ" mode The firmware on some systems may configure GPIO pins to be an interrupt source in so called "direct IRQ" mode. In such cases the GPIO controller driver has no idea if those pins are being used or not. At the same time, there is a known bug in the firmwares that don't restore the pin settings correctly after suspend, i.e. by an unknown reason the Rx value becomes inverted. Hence, let's save and restore the pins that are configured as GPIOs in the input mode with GPIROUTIOXAPIC bit set. Cc: stable@vger.kernel.org Reported-and-tested-by: Dale Smith Reported-and-tested-by: John Harris BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=214749 Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Link: https://lore.kernel.org/r/20221124222926.72326-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-intel.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 52ecd66ce357..047a8374b4fd 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static int __intel_gpio_get_gpio_mode(u32 value) +{ + return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; +} + static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) { - return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; + return __intel_gpio_get_gpio_mode(readl(padcfg0)); } static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) @@ -1674,6 +1679,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); + u32 value; if (!pd || !intel_pad_usable(pctrl, pin)) return false; @@ -1688,6 +1694,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin))) return true; + /* + * The firmware on some systems may configure GPIO pins to be + * an interrupt source in so called "direct IRQ" mode. In such + * cases the GPIO controller driver has no idea if those pins + * are being used or not. At the same time, there is a known bug + * in the firmwares that don't restore the pin settings correctly + * after suspend, i.e. by an unknown reason the Rx value becomes + * inverted. + * + * Hence, let's save and restore the pins that are configured + * as GPIOs in the input mode with GPIROUTIOXAPIC bit set. + * + * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. + */ + value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); + if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + return true; + return false; } From 1d6b5ed41f8c5c7012dbebe9bc0e2292a5a232b4 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Nov 2022 00:15:56 -0600 Subject: [PATCH 761/963] riscv: Fix NR_CPUS range conditions The conditions reference the symbol SBI_V01, which does not exist. The correct symbol is RISCV_SBI_V01. Fixes: e623715f3d67 ("RISC-V: Increase range and default value of NR_CPUS") Signed-off-by: Samuel Holland Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221126061557.3541-1-samuel@sholland.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fa78595a6089..593cf09264d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -317,9 +317,9 @@ config SMP config NR_CPUS int "Maximum number of CPUs (2-512)" depends on SMP - range 2 512 if !SBI_V01 - range 2 32 if SBI_V01 && 32BIT - range 2 64 if SBI_V01 && 64BIT + range 2 512 if !RISCV_SBI_V01 + range 2 32 if RISCV_SBI_V01 && 32BIT + range 2 64 if RISCV_SBI_V01 && 64BIT default "32" if 32BIT default "64" if 64BIT From 3f105a742725a1b78766a55169f1d827732e62b8 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 21 Nov 2022 14:33:03 +0100 Subject: [PATCH 762/963] riscv: Sync efi page table's kernel mappings before switching The EFI page table is initially created as a copy of the kernel page table. With VMAP_STACK enabled, kernel stacks are allocated in the vmalloc area: if the stack is allocated in a new PGD (one that was not present at the moment of the efi page table creation or not synced in a previous vmalloc fault), the kernel will take a trap when switching to the efi page table when the vmalloc kernel stack is accessed, resulting in a kernel panic. Fix that by updating the efi kernel mappings before switching to the efi page table. Signed-off-by: Alexandre Ghiti Fixes: b91540d52a08 ("RISC-V: Add EFI runtime services") Tested-by: Emil Renner Berthing Reviewed-by: Atish Patra Link: https://lore.kernel.org/r/20221121133303.1782246-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/efi.h | 6 +++++- arch/riscv/include/asm/pgalloc.h | 11 ++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index f74879a8f1ea..e229d7be4b66 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -10,6 +10,7 @@ #include #include #include +#include #ifdef CONFIG_EFI extern void efi_init(void); @@ -20,7 +21,10 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); -#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_setup() ({ \ + sync_kernel_mappings(efi_mm.pgd); \ + efi_virtmap_load(); \ + }) #define arch_efi_call_virt_teardown() efi_virtmap_unload() #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 947f23d7b6af..59dc12b5b7e8 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ +static inline void sync_kernel_mappings(pgd_t *pgd) +{ + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* Copy kernel mappings */ - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + sync_kernel_mappings(pgd); } return pgd; } From d5082d386eee7e8ec46fa8581932c81a4961dcef Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 24 Nov 2022 23:09:32 +0200 Subject: [PATCH 763/963] ipv4: Fix route deletion when nexthop info is not specified When the kernel receives a route deletion request from user space it tries to delete a route that matches the route attributes specified in the request. If only prefix information is specified in the request, the kernel should delete the first matching FIB alias regardless of its associated FIB info. However, an error is currently returned when the FIB info is backed by a nexthop object: # ip nexthop add id 1 via 192.0.2.2 dev dummy10 # ip route add 198.51.100.0/24 nhid 1 # ip route del 198.51.100.0/24 RTNETLINK answers: No such process Fix by matching on such a FIB info when legacy nexthop attributes are not specified in the request. An earlier check already covers the case where a nexthop ID is specified in the request. Add tests that cover these flows. Before the fix: # ./fib_nexthops.sh -t ipv4_fcnal ... TEST: Delete route when not specifying nexthop attributes [FAIL] Tests passed: 11 Tests failed: 1 After the fix: # ./fib_nexthops.sh -t ipv4_fcnal ... TEST: Delete route when not specifying nexthop attributes [ OK ] Tests passed: 12 Tests failed: 0 No regressions in other tests: # ./fib_nexthops.sh ... Tests passed: 228 Tests failed: 0 # ./fib_tests.sh ... Tests passed: 186 Tests failed: 0 Cc: stable@vger.kernel.org Reported-by: Jonas Gorski Tested-by: Jonas Gorski Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects") Fixes: 6bf92d70e690 ("net: ipv4: fix route with nexthop object delete warning") Fixes: 61b91eb33a69 ("ipv4: Handle attempt to delete multipath route when fib_info contains an nh reference") Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20221124210932.2470010-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_semantics.c | 8 +++++--- tools/testing/selftests/net/fib_nexthops.sh | 11 +++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f721c308248b..19a662003eef 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -888,9 +888,11 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; + if (fi->nh) { + if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) + return 1; + return 0; + } if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ee5e98204d3d..a47b26ab48f2 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -1228,6 +1228,17 @@ ipv4_fcnal() run_cmd "$IP ro add 172.16.101.0/24 nhid 21" run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1" log_test $? 2 "Delete multipath route with only nh id based entry" + + run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1" + run_cmd "$IP ro add 172.16.102.0/24 nhid 22" + run_cmd "$IP ro del 172.16.102.0/24 dev veth1" + log_test $? 2 "Delete route when specifying only nexthop device" + + run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6" + log_test $? 2 "Delete route when specifying only gateway" + + run_cmd "$IP ro del 172.16.102.0/24" + log_test $? 0 "Delete route when not specifying nexthop attributes" } ipv4_grp_fcnal() From 39f59bca275d2d819a8788c0f962e9e89843efc9 Mon Sep 17 00:00:00 2001 From: Jerry Ray Date: Mon, 28 Nov 2022 13:35:59 -0600 Subject: [PATCH 764/963] dsa: lan9303: Correct stat name This patch changes the reported ethtool statistics for the lan9303 family of parts covered by this driver. The TxUnderRun statistic label is renamed to RxShort to accurately reflect what stat the device is reporting. I did not reorder the statistics as that might cause problems with existing user code that are expecting the stats at a certain offset. Fixes: a1292595e006 ("net: dsa: add new DSA switch driver for the SMSC-LAN9303") Signed-off-by: Jerry Ray Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221128193559.6572-1-jerry.ray@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/lan9303-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 438e46af03e9..80f07bd20593 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -961,7 +961,7 @@ static const struct lan9303_mib_desc lan9303_mib[] = { { .offset = LAN9303_MAC_TX_BRDCST_CNT_0, .name = "TxBroad", }, { .offset = LAN9303_MAC_TX_PAUSE_CNT_0, .name = "TxPause", }, { .offset = LAN9303_MAC_TX_MULCST_CNT_0, .name = "TxMulti", }, - { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "TxUnderRun", }, + { .offset = LAN9303_MAC_RX_UNDSZE_CNT_0, .name = "RxShort", }, { .offset = LAN9303_MAC_TX_64_CNT_0, .name = "Tx64Byte", }, { .offset = LAN9303_MAC_TX_127_CNT_0, .name = "Tx128Byte", }, { .offset = LAN9303_MAC_TX_255_CNT_0, .name = "Tx256Byte", }, From fe94800184f22d4778628f1321dce5acb7513d84 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 28 Nov 2022 16:42:37 +0100 Subject: [PATCH 765/963] mptcp: don't orphan ssk in mptcp_close() All of the subflows of a msk will be orphaned in mptcp_close(), which means the subflows are in DEAD state. After then, DATA_FIN will be sent, and the other side will response with a DATA_ACK for this DATA_FIN. However, if the other side still has pending data, the data that received on these subflows will not be passed to the msk, as they are DEAD and subflow_data_ready() will not be called in tcp_data_ready(). Therefore, these data can't be acked, and they will be retransmitted again and again, until timeout. Fix this by setting ssk->sk_socket and ssk->sk_wq to 'NULL', instead of orphaning the subflows in __mptcp_close(), as Paolo suggested. Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Reviewed-by: Biao Jiang Reviewed-by: Mengen Sun Signed-off-by: Menglong Dong Reviewed-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b6dc6e260334..1dbc62537259 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2354,12 +2354,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, goto out; } - /* if we are invoked by the msk cleanup code, the subflow is - * already orphaned - */ - if (ssk->sk_socket) - sock_orphan(ssk); - + sock_orphan(ssk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2940,7 +2935,11 @@ bool __mptcp_close(struct sock *sk, long timeout) if (ssk == msk->first) subflow->fail_tout = 0; - sock_orphan(ssk); + /* detach from the parent socket, but allow data_ready to + * push incoming data into the mptcp stack, to properly ack it + */ + ssk->sk_socket = NULL; + ssk->sk_wq = NULL; unlock_sock_fast(ssk, slow); } sock_orphan(sk); From b4f166651d03b5484fa179817ba8ad4899a5a6ac Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 28 Nov 2022 16:42:38 +0100 Subject: [PATCH 766/963] mptcp: fix sleep in atomic at close time Matt reported a splat at msk close time: BUG: sleeping function called from invalid context at net/mptcp/protocol.c:2877 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 155, name: packetdrill preempt_count: 201, expected: 0 RCU nest depth: 0, expected: 0 4 locks held by packetdrill/155: #0: ffff888001536990 (&sb->s_type->i_mutex_key#6){+.+.}-{3:3}, at: __sock_release (net/socket.c:650) #1: ffff88800b498130 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close (net/mptcp/protocol.c:2973) #2: ffff88800b49a130 (sk_lock-AF_INET/1){+.+.}-{0:0}, at: __mptcp_close_ssk (net/mptcp/protocol.c:2363) #3: ffff88800b49a0b0 (slock-AF_INET){+...}-{2:2}, at: __lock_sock_fast (include/net/sock.h:1820) Preemption disabled at: 0x0 CPU: 1 PID: 155 Comm: packetdrill Not tainted 6.1.0-rc5 #365 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) __might_resched.cold (kernel/sched/core.c:9891) __mptcp_destroy_sock (include/linux/kernel.h:110) __mptcp_close (net/mptcp/protocol.c:2959) mptcp_subflow_queue_clean (include/net/sock.h:1777) __mptcp_close_ssk (net/mptcp/protocol.c:2363) mptcp_destroy_common (net/mptcp/protocol.c:3170) mptcp_destroy (include/net/sock.h:1495) __mptcp_destroy_sock (net/mptcp/protocol.c:2886) __mptcp_close (net/mptcp/protocol.c:2959) mptcp_close (net/mptcp/protocol.c:2974) inet_release (net/ipv4/af_inet.c:432) __sock_release (net/socket.c:651) sock_close (net/socket.c:1367) __fput (fs/file_table.c:320) task_work_run (kernel/task_work.c:181 (discriminator 1)) exit_to_user_mode_prepare (include/linux/resume_user_mode.h:49) syscall_exit_to_user_mode (kernel/entry/common.c:130) do_syscall_64 (arch/x86/entry/common.c:87) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) We can't call mptcp_close under the 'fast' socket lock variant, replace it with a sock_lock_nested() as the relevant code is already under the listening msk socket lock protection. Reported-by: Matthieu Baerts Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/316 Fixes: 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 02a54d59697b..2159b5f9988f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1745,16 +1745,16 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) for (msk = head; msk; msk = next) { struct sock *sk = (struct sock *)msk; - bool slow, do_cancel_work; + bool do_cancel_work; sock_hold(sk); - slow = lock_sock_fast_nested(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); next = msk->dl_next; msk->first = NULL; msk->dl_next = NULL; do_cancel_work = __mptcp_close(sk, 0); - unlock_sock_fast(sk, slow); + release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); sock_put(sk); From 3067bc61fcfe3081bf4807ce65560f499e895e77 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 25 Nov 2022 12:46:43 -0500 Subject: [PATCH 767/963] tipc: re-fetch skb cb after tipc_msg_validate As the call trace shows, the original skb was freed in tipc_msg_validate(), and dereferencing the old skb cb would cause an use-after-free crash. BUG: KASAN: use-after-free in tipc_crypto_rcv_complete+0x1835/0x2240 [tipc] Call Trace: tipc_crypto_rcv_complete+0x1835/0x2240 [tipc] tipc_crypto_rcv+0xd32/0x1ec0 [tipc] tipc_rcv+0x744/0x1150 [tipc] ... Allocated by task 47078: kmem_cache_alloc_node+0x158/0x4d0 __alloc_skb+0x1c1/0x270 tipc_buf_acquire+0x1e/0xe0 [tipc] tipc_msg_create+0x33/0x1c0 [tipc] tipc_link_build_proto_msg+0x38a/0x2100 [tipc] tipc_link_timeout+0x8b8/0xef0 [tipc] tipc_node_timeout+0x2a1/0x960 [tipc] call_timer_fn+0x2d/0x1c0 ... Freed by task 47078: tipc_msg_validate+0x7b/0x440 [tipc] tipc_crypto_rcv_complete+0x4b5/0x2240 [tipc] tipc_crypto_rcv+0xd32/0x1ec0 [tipc] tipc_rcv+0x744/0x1150 [tipc] This patch fixes it by re-fetching the skb cb from the new allocated skb after calling tipc_msg_validate(). Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Reported-by: Shuang Li Signed-off-by: Xin Long Link: https://lore.kernel.org/r/1b1cdba762915325bd8ef9a98d0276eb673df2a5.1669398403.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/tipc/crypto.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index f09316a9035f..d67440de011e 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1971,6 +1971,9 @@ static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, /* Ok, everything's fine, try to synch own keys according to peers' */ tipc_crypto_key_synch(rx, *skb); + /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ + skb_cb = TIPC_SKB_CB(*skb); + /* Mark skb decrypted */ skb_cb->decrypted = 1; From 7e177d32442b7ed08a9fa61b61724abc548cb248 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 25 Nov 2022 15:57:24 +0800 Subject: [PATCH 768/963] net: hsr: Fix potential use-after-free The skb is delivered to netif_rx() which may free it, after calling this, dereferencing skb may trigger use-after-free. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20221125075724.27912-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_forward.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index a50429a62f74..56bb27d67a2e 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -351,17 +351,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev, struct hsr_node *node_src) { bool was_multicast_frame; - int res; + int res, recv_len; was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST); hsr_addr_subst_source(node_src, skb); skb_pull(skb, ETH_HLEN); + recv_len = skb->len; res = netif_rx(skb); if (res == NET_RX_DROP) { dev->stats.rx_dropped++; } else { dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += recv_len; if (was_multicast_frame) dev->stats.multicast++; } From cdde1560118f82498fc9e9a7c1ef7f0ef7755891 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Nov 2022 23:01:30 +0800 Subject: [PATCH 769/963] net: mdiobus: fix unbalanced node reference count I got the following report while doing device(mscc-miim) load test with CONFIG_OF_UNITTEST and CONFIG_OF_DYNAMIC enabled: OF: ERROR: memory leak, expected refcount 1 instead of 2, of_node_get()/of_node_put() unbalanced - destroy cset entry: attach overlay node /spi/soc@0/mdio@7107009c/ethernet-phy@0 If the 'fwnode' is not an acpi node, the refcount is get in fwnode_mdiobus_phy_device_register(), but it has never been put when the device is freed in the normal path. So call fwnode_handle_put() in phy_device_release() to avoid leak. If it's an acpi node, it has never been get, but it's put in the error path, so call fwnode_handle_get() before phy_device_register() to keep get/put operation balanced. Fixes: bc1bee3b87ee ("net: mdiobus: Introduce fwnode_mdiobus_register_phy()") Signed-off-by: Yang Yingliang Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221124150130.609420-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/fwnode_mdio.c | 2 +- drivers/net/phy/phy_device.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index 689e728345ce..eb344f6d4a7b 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -148,7 +148,7 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, /* Associate the fwnode with the device structure so it * can be looked up later. */ - phy->mdio.dev.fwnode = child; + phy->mdio.dev.fwnode = fwnode_handle_get(child); /* All data is now stored in the phy struct, so register it */ rc = phy_device_register(phy); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a72adabfd2a7..8cff61dbc4b5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -217,6 +217,7 @@ static void phy_mdio_device_free(struct mdio_device *mdiodev) static void phy_device_release(struct device *dev) { + fwnode_handle_put(dev->fwnode); kfree(to_phy_device(dev)); } From ca57f02295f188d6c65ec02202402979880fa6d8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 28 Nov 2022 22:02:56 +0000 Subject: [PATCH 770/963] afs: Fix fileserver probe RTT handling The fileserver probing code attempts to work out the best fileserver to use for a volume by retrieving the RTT calculated by AF_RXRPC for the probe call sent to each server and comparing them. Sometimes, however, no RTT estimate is available and rxrpc_kernel_get_srtt() returns false, leading good fileservers to be given an RTT of UINT_MAX and thus causing the rotation algorithm to ignore them. Fix afs_select_fileserver() to ignore rxrpc_kernel_get_srtt()'s return value and just take the estimated RTT it provides - which will be capped at 1 second. Fixes: 1d4adfaf6574 ("rxrpc: Make rxrpc_kernel_get_srtt() indicate validity") Signed-off-by: David Howells Reviewed-by: Marc Dionne Tested-by: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/166965503999.3392585.13954054113218099395.stgit@warthog.procyon.org.uk/ Signed-off-by: Linus Torvalds --- fs/afs/fs_probe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index c0031a3ab42f..3ac5fcf98d0d 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -167,8 +167,8 @@ void afs_fileserver_probe_result(struct afs_call *call) clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags); } - if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) && - rtt_us < server->probe.rtt) { + rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us); + if (rtt_us < server->probe.rtt) { server->probe.rtt = rtt_us; server->rtt = rtt_us; alist->preferred = index; From f5f041c153d7af1d980a4a03dcf8f2b2f1ce1e4e Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Tue, 22 Nov 2022 22:18:04 -0800 Subject: [PATCH 771/963] ANDROID: Fix license for BUILD.bazel file. It should be GPLv2. Test: none Bug: 259137060 Change-Id: I6381a9e5a543c24d395984c211bfde5993ea4840 Signed-off-by: Yifan Hong --- BUILD.bazel | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 5c9b101a7224..2a3ff0dd24c0 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1,16 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2021 The Android Open Source Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. + load("//build/bazel_common_rules/dist:dist.bzl", "copy_to_dist_dir") load("//build/kernel/kleaf:common_kernels.bzl", "define_common_kernels", "define_db845c") load("//build/kernel/kleaf:kernel.bzl", "ddk_headers", "kernel_build", "kernel_images", "kernel_modules_install") From 2f3830544a89af2e72e7fd3d6ca44dd9cffec197 Mon Sep 17 00:00:00 2001 From: Radhakrishna Sripada Date: Thu, 17 Nov 2022 13:30:14 -0800 Subject: [PATCH 772/963] drm/i915/mtl: Fix dram info readout MEM_SS_INFO_GLOBAL Register info read from the hardware is cached in val. However the variable is being modified when determining the DRAM type thereby clearing out the channels and qgv info extracted later in the function xelpdp_get_dram_info. Preserve the register value and use extracted fields in the switch statement. Fixes: 825477e77912 ("drm/i915/mtl: Obtain SAGV values from MMIO instead of GT pcode mailbox") Cc: Matt Roper Signed-off-by: Radhakrishna Sripada Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20221117213015.584417-1-radhakrishna.sripada@intel.com (cherry picked from commit ec35c41d91052a3a15dd3767075620af448b8030) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_dram.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 2403ccd52c74..bba8cb6e8ae4 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL); struct dram_info *dram_info = &i915->dram_info; - val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val); - switch (val) { + switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) { case 0: dram_info->type = INTEL_DRAM_DDR4; break; From 3c1ea6a5f4f55d4e376675dda16945eb5d9bb4de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 18 Nov 2022 20:52:01 +0200 Subject: [PATCH 773/963] drm/i915: Remove non-existent pipes from bigjoiner pipe mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bigjoiner_pipes() doesn't consider that: - RKL only has three pipes - some pipes may be fused off This means that intel_atomic_check_bigjoiner() won't reject all configurations that would need a non-existent pipe. Instead we just keep on rolling witout actually having reserved the slave pipe we need. It's possible that we don't outright explode anywhere due to this since eg. for_each_intel_crtc_in_pipe_mask() will only walk the crtcs we've registered even though the passed in pipe_mask asks for more of them. But clearly the thing won't do what is expected of it when the required pipes are not present. Fix the problem by consulting the device info pipe_mask already in bigjoiner_pipes(). Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221118185201.10469-1-ville.syrjala@linux.intel.com Reviewed-by: Arun R Murthy (cherry picked from commit f1c87a94a1087a26f41007ee83264033007421b5) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 461c62c88413..de77054195c6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3723,12 +3723,16 @@ static bool ilk_get_pipe_config(struct intel_crtc *crtc, static u8 bigjoiner_pipes(struct drm_i915_private *i915) { + u8 pipes; + if (DISPLAY_VER(i915) >= 12) - return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); + pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); else if (DISPLAY_VER(i915) >= 11) - return BIT(PIPE_B) | BIT(PIPE_C); + pipes = BIT(PIPE_B) | BIT(PIPE_C); else - return 0; + pipes = 0; + + return pipes & RUNTIME_INFO(i915)->pipe_mask; } static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv, From a8899b8728013c7b2456f0bfa20e5fea85ee0fd1 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 21 Nov 2022 15:56:54 +0100 Subject: [PATCH 774/963] drm/i915: Fix negative value passed as remaining time Commit b97060a99b01 ("drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC") extended the API of intel_gt_retire_requests_timeout() with an extra argument 'remaining_timeout', intended for passing back unconsumed portion of requested timeout when 0 (success) is returned. However, when request retirement happens to succeed despite an error returned by a call to dma_fence_wait_timeout(), that error code (a negative value) is passed back instead of remaining time. If we then pass that negative value forward as requested timeout to intel_uc_wait_for_idle(), an explicit BUG will be triggered. If request retirement succeeds but an error code is passed back via remaininig_timeout, we may have no clue on how much of the initial timeout might have been left for spending it on waiting for GuC to become idle. OTOH, since all pending requests have been successfully retired, that error code has been already ignored by intel_gt_retire_requests_timeout(), then we shouldn't fail. Assume no more time has been left on error and pass 0 timeout value to intel_uc_wait_for_idle() to give it a chance to return success if GuC is already idle. v3: Don't fail on any error passed back via remaining_timeout. v2: Fix the issue on the caller side, not the provider. Fixes: b97060a99b01 ("drm/i915/guc: Update intel_gt_wait_for_idle to work with GuC") Signed-off-by: Janusz Krzysztofik Cc: stable@vger.kernel.org # v5.15+ Reviewed-by: Andrzej Hajda Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221121145655.75141-2-janusz.krzysztofik@linux.intel.com (cherry picked from commit f235dbd5b768e238d365fd05d92de5a32abc1c1f) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_gt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d0b03a928b9a..9f0388b3dcf2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -625,8 +625,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) From 12b8b046e4c9de40fa59b6f067d6826f4e688f68 Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Mon, 21 Nov 2022 15:56:55 +0100 Subject: [PATCH 775/963] drm/i915: Never return 0 if not all requests retired Users of intel_gt_retire_requests_timeout() expect 0 return value on success. However, we have no protection from passing back 0 potentially returned by a call to dma_fence_wait_timeout() when it succedes right after its timeout has expired. Replace 0 with -ETIME before potentially using the timeout value as return code, so -ETIME is returned if there are still some requests not retired after timeout, 0 otherwise. v3: Use conditional expression, more compact but also better reflecting intention standing behind the change. v2: Move the added lines down so flush_submission() is not affected. Fixes: f33a8a51602c ("drm/i915: Merge wait_for_timelines with retire_request") Signed-off-by: Janusz Krzysztofik Reviewed-by: Andrzej Hajda Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20221121145655.75141-3-janusz.krzysztofik@linux.intel.com (cherry picked from commit f301a29f143760ce8d3d6b6a8436d45d3448cde6) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) From 5daadc86f27ea4d691e2131c04310d0418c6cd12 Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Fri, 25 Nov 2022 02:51:34 +0900 Subject: [PATCH 776/963] net: tun: Fix use-after-free in tun_detach() syzbot reported use-after-free in tun_detach() [1]. This causes call trace like below: ================================================================== BUG: KASAN: use-after-free in notifier_call_chain+0x1ee/0x200 kernel/notifier.c:75 Read of size 8 at addr ffff88807324e2a8 by task syz-executor.0/3673 CPU: 0 PID: 3673 Comm: syz-executor.0 Not tainted 6.1.0-rc5-syzkaller-00044-gcc675d22e422 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x15e/0x461 mm/kasan/report.c:395 kasan_report+0xbf/0x1f0 mm/kasan/report.c:495 notifier_call_chain+0x1ee/0x200 kernel/notifier.c:75 call_netdevice_notifiers_info+0x86/0x130 net/core/dev.c:1942 call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] call_netdevice_notifiers net/core/dev.c:1997 [inline] netdev_wait_allrefs_any net/core/dev.c:10237 [inline] netdev_run_todo+0xbc6/0x1100 net/core/dev.c:10351 tun_detach drivers/net/tun.c:704 [inline] tun_chr_close+0xe4/0x190 drivers/net/tun.c:3467 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xb3d/0x2a30 kernel/exit.c:820 do_group_exit+0xd4/0x2a0 kernel/exit.c:950 get_signal+0x21b1/0x2440 kernel/signal.c:2858 arch_do_signal_or_restart+0x86/0x2300 arch/x86/kernel/signal.c:869 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x15f/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd The cause of the issue is that sock_put() from __tun_detach() drops last reference count for struct net, and then notifier_call_chain() from netdev_state_change() accesses that struct net. This patch fixes the issue by calling sock_put() from tun_detach() after all necessary accesses for the struct net has done. Fixes: 83c1f36f9880 ("tun: send netlink notification when the device is modified") Reported-by: syzbot+106f9b687cd64ee70cd1@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=96eb7f1ce75ef933697f24eeab928c4a716edefe [1] Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20221124175134.1589053-1-syoshida@redhat.com Signed-off-by: Paolo Abeni --- drivers/net/tun.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 7a3ab3427369..24001112c323 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -686,7 +686,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (tun) xdp_rxq_info_unreg(&tfile->xdp_rxq); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - sock_put(&tfile->sk); } } @@ -702,6 +701,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) if (dev) netdev_state_change(dev); rtnl_unlock(); + + if (clean) + sock_put(&tfile->sk); } static void tun_detach_all(struct net_device *dev) From e493bec343fa76e95631d0e21fd4a3538aa90c56 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Fri, 25 Nov 2022 09:27:51 +0800 Subject: [PATCH 777/963] net: marvell: prestera: Fix a NULL vs IS_ERR() check in some functions rhashtable_lookup_fast() returns NULL when failed instead of error pointer. Fixes: 396b80cb5cc8 ("net: marvell: prestera: Add neighbour cache accounting") Fixes: 0a23ae237171 ("net: marvell: prestera: Add router nexthops ABI") Signed-off-by: Shang XiaoJing Link: https://lore.kernel.org/r/20221125012751.23249-1-shangxiaojing@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/prestera/prestera_router.c | 2 +- drivers/net/ethernet/marvell/prestera/prestera_router_hw.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c index 4046be0e86ff..a9a1028cb17b 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c @@ -457,7 +457,7 @@ prestera_kern_neigh_cache_find(struct prestera_switch *sw, n_cache = rhashtable_lookup_fast(&sw->router->kern_neigh_cache_ht, key, __prestera_kern_neigh_cache_ht_params); - return IS_ERR(n_cache) ? NULL : n_cache; + return n_cache; } static void diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c index aa080dc57ff0..02faaea2aefa 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_router_hw.c @@ -330,7 +330,7 @@ prestera_nh_neigh_find(struct prestera_switch *sw, nh_neigh = rhashtable_lookup_fast(&sw->router->nh_neigh_ht, key, __prestera_nh_neigh_ht_params); - return IS_ERR(nh_neigh) ? NULL : nh_neigh; + return nh_neigh; } struct prestera_nh_neigh * @@ -484,7 +484,7 @@ __prestera_nexthop_group_find(struct prestera_switch *sw, nh_grp = rhashtable_lookup_fast(&sw->router->nexthop_group_ht, key, __prestera_nexthop_group_ht_params); - return IS_ERR(nh_grp) ? NULL : nh_grp; + return nh_grp; } static struct prestera_nexthop_group * From c61bfb1cb63ddab52b31cf5f1924688917e61fad Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 25 Nov 2022 17:01:41 +0800 Subject: [PATCH 778/963] mmc: mtk-sd: Fix missing clk_disable_unprepare in msdc_of_clock_parse() The clk_disable_unprepare() should be called in the error handling of devm_clk_bulk_get_optional, fix it by replacing devm_clk_get_optional and clk_prepare_enable by devm_clk_get_optional_enabled. Fixes: f5eccd94b63f ("mmc: mediatek: Add subsys clock control for MT8192 msdc") Signed-off-by: Gaosheng Cui Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221125090141.3626747-1-cuigaosheng1@huawei.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mtk-sd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index df941438aef5..26bc59b5a7cc 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2588,13 +2588,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev, return PTR_ERR(host->src_clk_cg); } - host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg"); + /* If present, always enable for this clock gate */ + host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg"); if (IS_ERR(host->sys_clk_cg)) host->sys_clk_cg = NULL; - /* If present, always enable for this clock gate */ - clk_prepare_enable(host->sys_clk_cg); - host->bulk_clks[0].id = "pclk_cg"; host->bulk_clks[1].id = "axi_cg"; host->bulk_clks[2].id = "ahb_cg"; From c981cdfb9925f64a364f13c2b4f98f877308a408 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 28 Nov 2022 15:32:56 +0200 Subject: [PATCH 779/963] mmc: sdhci: Fix voltage switch delay Commit 20b92a30b561 ("mmc: sdhci: update signal voltage switch code") removed voltage switch delays from sdhci because mmc core had been enhanced to support them. However that assumed that sdhci_set_ios() did a single clock change, which it did not, and so the delays in mmc core, which should have come after the first clock change, were not effective. Fix by avoiding re-configuring UHS and preset settings when the clock is turning on and the settings have not changed. That then also avoids the associated clock changes, so that then sdhci_set_ios() does a single clock change when voltage switching, and the mmc core delays become effective. To do that has meant keeping track of driver strength (host->drv_type), and cases of reinitialization (host->reinit_uhs). Note also, the 'turning_on_clk' restriction should not be necessary but is done to minimize the impact of the change on stable kernels. Fixes: 20b92a30b561 ("mmc: sdhci: update signal voltage switch code") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Link: https://lore.kernel.org/r/20221128133259.38305-2-adrian.hunter@intel.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 61 +++++++++++++++++++++++++++++++++++----- drivers/mmc/host/sdhci.h | 2 ++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index fef03de85b99..c7ad32a75b57 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -373,6 +373,7 @@ static void sdhci_init(struct sdhci_host *host, int soft) if (soft) { /* force clock reconfiguration */ host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } } @@ -2293,11 +2294,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) } EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); +static bool sdhci_timing_has_preset(unsigned char timing) +{ + switch (timing) { + case MMC_TIMING_UHS_SDR12: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + return true; + }; + return false; +} + +static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) +{ + return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + sdhci_timing_has_preset(timing); +} + +static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) +{ + /* + * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK + * Frequency. Check if preset values need to be enabled, or the Driver + * Strength needs updating. Note, clock changes are handled separately. + */ + return !host->preset_enabled && + (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); +} + void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); + bool reinit_uhs = host->reinit_uhs; + bool turning_on_clk = false; u8 ctrl; + host->reinit_uhs = false; + if (ios->power_mode == MMC_POWER_UNDEFINED) return; @@ -2323,6 +2359,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { + turning_on_clk = ios->clock && !host->clock; + host->ops->set_clock(host, ios->clock); host->clock = ios->clock; @@ -2349,6 +2387,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_bus_width(host, ios->bus_width); + /* + * Special case to avoid multiple clock changes during voltage + * switching. + */ + if (!reinit_uhs && + turning_on_clk && + host->timing == ios->timing && + host->version >= SDHCI_SPEC_300 && + !sdhci_presetable_values_change(host, ios)) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { @@ -2392,6 +2441,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + host->drv_type = ios->drv_type; } else { /* * According to SDHC Spec v3.00, if the Preset Value @@ -2419,19 +2469,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_uhs_signaling(host, ios->timing); host->timing = ios->timing; - if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && - ((ios->timing == MMC_TIMING_UHS_SDR12) || - (ios->timing == MMC_TIMING_UHS_SDR25) || - (ios->timing == MMC_TIMING_UHS_SDR50) || - (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50) || - (ios->timing == MMC_TIMING_MMC_DDR52))) { + if (sdhci_preset_needed(host, ios->timing)) { u16 preset; sdhci_enable_preset_value(host, true); preset = sdhci_get_preset_value(host); ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, preset); + host->drv_type = ios->drv_type; } /* Re-enable SD Clock */ @@ -3768,6 +3813,7 @@ int sdhci_resume_host(struct sdhci_host *host) sdhci_init(host, 0); host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER)); @@ -3830,6 +3876,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) /* Force clock and power re-program */ host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index d750c464bd1e..87a3aaa07438 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -524,6 +524,8 @@ struct sdhci_host { unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ + u8 drv_type; /* Current UHS-I driver type */ + bool reinit_uhs; /* Force UHS-related re-initialization */ bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ From 4d002d6a2a00ac1c433899bd7625c6400a74cfba Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sun, 20 Nov 2022 15:50:46 +0800 Subject: [PATCH 780/963] ieee802154: cc2520: Fix error return code in cc2520_hw_init() In cc2520_hw_init(), if oscillator start failed, the error code should be returned. Fixes: 0da6bc8cc341 ("ieee802154: cc2520: adds driver for TI CC2520 radio") Signed-off-by: Ziyang Xuan Link: https://lore.kernel.org/r/20221120075046.2213633-1-william.xuanziyang@huawei.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/cc2520.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index c69b87d3837d..edc769daad07 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -970,7 +970,7 @@ static int cc2520_hw_init(struct cc2520_private *priv) if (timeout-- <= 0) { dev_err(&priv->spi->dev, "oscillator start failed!\n"); - return ret; + return -ETIMEDOUT; } udelay(1); } while (!(status & CC2520_STATUS_XOSC32M_STABLE)); From 1e24c54da257ab93cff5826be8a793b014a5dc9c Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Mon, 21 Nov 2022 01:22:01 +0100 Subject: [PATCH 781/963] ca8210: Fix crash by zero initializing data The struct cas_control embeds multiple generic SPI structures and we have to make sure these structures are initialized to default values. This driver does not set all attributes. When using kmalloc before some attributes were not initialized and contained random data which caused random crashes at bootup. Fixes: ded845a781a5 ("ieee802154: Add CA8210 IEEE 802.15.4 device driver") Signed-off-by: Hauke Mehrtens Link: https://lore.kernel.org/r/20221121002201.1339636-1-hauke@hauke-m.de Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/ca8210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 450b16ad40a4..e1a569b99e4a 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -885,7 +885,7 @@ static int ca8210_spi_transfer( dev_dbg(&spi->dev, "%s called\n", __func__); - cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC); + cas_ctl = kzalloc(sizeof(*cas_ctl), GFP_ATOMIC); if (!cas_ctl) return -ENOMEM; From dda3bbbb26c823cd54d5bf211df7db12147c9392 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 29 Nov 2022 01:30:05 -0800 Subject: [PATCH 782/963] Revert "net/mlx5e: MACsec, remove replay window size limitation in offload path" This reverts commit c0071be0e16c461680d87b763ba1ee5e46548fde. The cited commit removed the validity checks which initialized the window_sz and never removed the use of the now uninitialized variable, so now we are left with wrong value in the window size and the following clang warning: [-Wuninitialized] drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c:232:45: warning: variable 'window_sz' is uninitialized when used here MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz); Revet at this time to address the clang issue due to lack of time to test the proper solution. Fixes: c0071be0e16c ("net/mlx5e: MACsec, remove replay window size limitation in offload path") Signed-off-by: Saeed Mahameed Reported-by: Jacob Keller Link: https://lore.kernel.org/r/20221129093006.378840-1-saeed@kernel.org Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/macsec.c | 16 ++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 7 +++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 0d6dc394a12a..f900709639f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -229,6 +229,22 @@ static int macsec_set_replay_protection(struct mlx5_macsec_obj_attrs *attrs, voi if (!attrs->replay_protect) return 0; + switch (attrs->replay_window) { + case 256: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_256BIT; + break; + case 128: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_128BIT; + break; + case 64: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_64BIT; + break; + case 32: + window_sz = MLX5_MACSEC_ASO_REPLAY_WIN_32BIT; + break; + default: + return -EINVAL; + } MLX5_SET(macsec_aso, aso_ctx, window_size, window_sz); MLX5_SET(macsec_aso, aso_ctx, mode, MLX5_MACSEC_ASO_REPLAY_PROTECTION); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 981fc7dfa408..5a4e914e2a6f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -11611,6 +11611,13 @@ enum { MLX5_MACSEC_ASO_REPLAY_PROTECTION = 0x1, }; +enum { + MLX5_MACSEC_ASO_REPLAY_WIN_32BIT = 0x0, + MLX5_MACSEC_ASO_REPLAY_WIN_64BIT = 0x1, + MLX5_MACSEC_ASO_REPLAY_WIN_128BIT = 0x2, + MLX5_MACSEC_ASO_REPLAY_WIN_256BIT = 0x3, +}; + #define MLX5_MACSEC_ASO_INC_SN 0x2 #define MLX5_MACSEC_ASO_REG_C_4_5 0x2 From 0e682f04b4b59eac0b0a030251513589c4607458 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 29 Nov 2022 01:30:06 -0800 Subject: [PATCH 783/963] net/mlx5: Lag, Fix for loop when checking lag The cited commit adds a for loop to check if each port supports lag or not. But dev is not initialized correctly. Fix it by initializing dev for each iteration. Fixes: e87c6a832f88 ("net/mlx5: E-switch, Fix duplicate lag creation") Signed-off-by: Chris Mi Reported-by: Jacob Keller Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20221129093006.378840-2-saeed@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 4070dc1d17cb..32c3e0a649a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -700,11 +700,13 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; #ifdef CONFIG_MLX5_ESWITCH - dev = ldev->pf[MLX5_LAG_P1].dev; - for (i = 0; i < ldev->ports; i++) + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) return false; + } + dev = ldev->pf[MLX5_LAG_P1].dev; mode = mlx5_eswitch_mode(dev); for (i = 0; i < ldev->ports; i++) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) From b85f628aa158a653c006e9c1405a117baef8c868 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 28 Nov 2022 11:18:12 -0500 Subject: [PATCH 784/963] packet: do not set TP_STATUS_CSUM_VALID on CHECKSUM_COMPLETE CHECKSUM_COMPLETE signals that skb->csum stores the sum over the entire packet. It does not imply that an embedded l4 checksum field has been validated. Fixes: 682f048bd494 ("af_packet: pass checksum validation status to the user") Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20221128161812.640098-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6ce8dd19f33c..1ab65f7f2a0a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2293,8 +2293,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) status |= TP_STATUS_CSUM_VALID; if (snaplen > res) @@ -3520,8 +3519,7 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (skb->ip_summed == CHECKSUM_PARTIAL) aux.tp_status |= TP_STATUS_CSUMNOTREADY; else if (skb->pkt_type != PACKET_OUTGOING && - (skb->ip_summed == CHECKSUM_COMPLETE || - skb_csum_unnecessary(skb))) + skb_csum_unnecessary(skb)) aux.tp_status |= TP_STATUS_CSUM_VALID; aux.tp_len = origlen; From 9ed7bfc79542119ac0a9e1ce8a2a5285e43433e9 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 26 Nov 2022 11:17:20 +0800 Subject: [PATCH 785/963] sctp: fix memory leak in sctp_stream_outq_migrate() When sctp_stream_outq_migrate() is called to release stream out resources, the memory pointed to by prio_head in stream out is not released. The memory leak information is as follows: unreferenced object 0xffff88801fe79f80 (size 64): comm "sctp_repo", pid 7957, jiffies 4294951704 (age 36.480s) hex dump (first 32 bytes): 80 9f e7 1f 80 88 ff ff 80 9f e7 1f 80 88 ff ff ................ 90 9f e7 1f 80 88 ff ff 90 9f e7 1f 80 88 ff ff ................ backtrace: [] kmalloc_trace+0x26/0x60 [] sctp_sched_prio_set+0x4cc/0x770 [] sctp_stream_init_ext+0xd2/0x1b0 [] sctp_sendmsg_to_asoc+0x1614/0x1a30 [] sctp_sendmsg+0xda1/0x1ef0 [] inet_sendmsg+0x9d/0xe0 [] sock_sendmsg+0xd3/0x120 [] __sys_sendto+0x23a/0x340 [] __x64_sys_sendto+0xe1/0x1b0 [] do_syscall_64+0x39/0xb0 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://syzkaller.appspot.com/bug?exrid=29c402e56c4760763cc0 Fixes: 637784ade221 ("sctp: introduce priority based stream scheduler") Reported-by: syzbot+29c402e56c4760763cc0@syzkaller.appspotmail.com Signed-off-by: Zhengchao Shao Reviewed-by: Xin Long Link: https://lore.kernel.org/r/20221126031720.378562-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- include/net/sctp/stream_sched.h | 2 ++ net/sctp/stream.c | 25 ++++++++++++++++++------- net/sctp/stream_sched.c | 5 +++++ net/sctp/stream_sched_prio.c | 19 +++++++++++++++++++ net/sctp/stream_sched_rr.c | 5 +++++ 5 files changed, 49 insertions(+), 7 deletions(-) diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 01a70b27e026..65058faea4db 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -26,6 +26,8 @@ struct sctp_sched_ops { int (*init)(struct sctp_stream *stream); /* Init a stream */ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* free a stream */ + void (*free_sid)(struct sctp_stream *stream, __u16 sid); /* Frees the entire thing */ void (*free)(struct sctp_stream *stream); diff --git a/net/sctp/stream.c b/net/sctp/stream.c index ef9fceadef8d..ee6514af830f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -52,6 +52,19 @@ static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) } } +static void sctp_stream_free_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_sched_ops *sched; + + if (!SCTP_SO(stream, sid)->ext) + return; + + sched = sctp_sched_ops_from_stream(stream); + sched->free_sid(stream, sid); + kfree(SCTP_SO(stream, sid)->ext); + SCTP_SO(stream, sid)->ext = NULL; +} + /* Migrates chunks from stream queues to new stream queues if needed, * but not across associations. Also, removes those chunks to streams * higher than the new max. @@ -70,16 +83,14 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, * sctp_stream_update will swap ->out pointers. */ for (i = 0; i < outcnt; i++) { - kfree(SCTP_SO(new, i)->ext); + sctp_stream_free_ext(new, i); SCTP_SO(new, i)->ext = SCTP_SO(stream, i)->ext; SCTP_SO(stream, i)->ext = NULL; } } - for (i = outcnt; i < stream->outcnt; i++) { - kfree(SCTP_SO(stream, i)->ext); - SCTP_SO(stream, i)->ext = NULL; - } + for (i = outcnt; i < stream->outcnt; i++) + sctp_stream_free_ext(stream, i); } static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, @@ -174,9 +185,9 @@ void sctp_stream_free(struct sctp_stream *stream) struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; - sched->free(stream); + sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) - kfree(SCTP_SO(stream, i)->ext); + sctp_stream_free_ext(stream, i); genradix_free(&stream->out); genradix_free(&stream->in); } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 1ad565ed5627..7c8f9d89e16a 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -46,6 +46,10 @@ static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_fcfs_free(struct sctp_stream *stream) { } @@ -96,6 +100,7 @@ static struct sctp_sched_ops sctp_sched_fcfs = { .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, + .free_sid = sctp_sched_fcfs_free_sid, .free = sctp_sched_fcfs_free, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c index 80b5a2c4cbc7..4fc9f2923ed1 100644 --- a/net/sctp/stream_sched_prio.c +++ b/net/sctp/stream_sched_prio.c @@ -204,6 +204,24 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, return sctp_sched_prio_set(stream, sid, 0, gfp); } +static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head; + int i; + + if (!prio) + return; + + SCTP_SO(stream, sid)->ext->prio_head = NULL; + for (i = 0; i < stream->outcnt; i++) { + if (SCTP_SO(stream, i)->ext && + SCTP_SO(stream, i)->ext->prio_head == prio) + return; + } + + kfree(prio); +} + static void sctp_sched_prio_free(struct sctp_stream *stream) { struct sctp_stream_priorities *prio, *n; @@ -323,6 +341,7 @@ static struct sctp_sched_ops sctp_sched_prio = { .get = sctp_sched_prio_get, .init = sctp_sched_prio_init, .init_sid = sctp_sched_prio_init_sid, + .free_sid = sctp_sched_prio_free_sid, .free = sctp_sched_prio_free, .enqueue = sctp_sched_prio_enqueue, .dequeue = sctp_sched_prio_dequeue, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c index ff425aed62c7..cc444fe0d67c 100644 --- a/net/sctp/stream_sched_rr.c +++ b/net/sctp/stream_sched_rr.c @@ -90,6 +90,10 @@ static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, return 0; } +static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) +{ +} + static void sctp_sched_rr_free(struct sctp_stream *stream) { sctp_sched_rr_unsched_all(stream); @@ -177,6 +181,7 @@ static struct sctp_sched_ops sctp_sched_rr = { .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, .init_sid = sctp_sched_rr_init_sid, + .free_sid = sctp_sched_rr_free_sid, .free = sctp_sched_rr_free, .enqueue = sctp_sched_rr_enqueue, .dequeue = sctp_sched_rr_dequeue, From 91a2bbfff3e3c64b3e1aa3ad04381d0572b3d543 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 28 Nov 2022 17:17:34 -0800 Subject: [PATCH 786/963] ionic: update MAINTAINERS entry Now that Pensando is a part of AMD we need to update a couple of addresses. We're keeping the mailing list address for the moment, but that will likely change in the near future. Signed-off-by: Shannon Nelson Signed-off-by: Brett Creeley Link: https://lore.kernel.org/r/20221129011734.20849-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- .mailmap | 1 + MAINTAINERS | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index fdd7989492fc..c12809090d76 100644 --- a/.mailmap +++ b/.mailmap @@ -389,6 +389,7 @@ Sebastian Reichel Sebastian Reichel Sedat Dilek Seth Forshee +Shannon Nelson Shiraz Hashim Shuah Khan Shuah Khan diff --git a/MAINTAINERS b/MAINTAINERS index 4a203c5f6c27..3480734b04d8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16139,7 +16139,8 @@ F: include/linux/peci-cpu.h F: include/linux/peci.h PENSANDO ETHERNET DRIVERS -M: Shannon Nelson +M: Shannon Nelson +M: Brett Creeley M: drivers@pensando.io L: netdev@vger.kernel.org S: Supported From 178833f99f587e9de9c888c38d82521dcfda12f0 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Tue, 29 Nov 2022 04:43:48 +0530 Subject: [PATCH 787/963] MAINTAINERS: Update maintainer list for chelsio drivers This updates the maintainers for chelsio inline crypto drivers and chelsio crypto drivers. Signed-off-by: Ayush Sawal Link: https://lore.kernel.org/r/20221128231348.8225-1-ayush.sawal@chelsio.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ---- 1 file changed, 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3480734b04d8..1873fc66d691 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5585,8 +5585,6 @@ F: drivers/scsi/cxgbi/cxgb3i CXGB4 CRYPTO DRIVER (chcr) M: Ayush Sawal -M: Vinay Kumar Yadav -M: Rohit Maheshwari L: linux-crypto@vger.kernel.org S: Supported W: http://www.chelsio.com @@ -5594,8 +5592,6 @@ F: drivers/crypto/chelsio CXGB4 INLINE CRYPTO DRIVER M: Ayush Sawal -M: Vinay Kumar Yadav -M: Rohit Maheshwari L: netdev@vger.kernel.org S: Supported W: http://www.chelsio.com From d66233a312ec9013af3e37e4030b479a20811ec3 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 28 Nov 2022 15:56:04 +0900 Subject: [PATCH 788/963] net: ethernet: renesas: ravb: Fix promiscuous mode after system resumed After system resumed on some environment board, the promiscuous mode is disabled because the SoC turned off. So, call ravb_set_rx_mode() in the ravb_resume() to fix the issue. Reported-by: Tho Vu Fixes: 0184165b2f42 ("ravb: add sleep PM suspend/resume support") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Pavan Chebbi Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/20221128065604.1864391-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 36324126db6d..6bc923326268 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -3020,6 +3020,7 @@ static int __maybe_unused ravb_resume(struct device *dev) ret = ravb_open(ndev); if (ret < 0) return ret; + ravb_set_rx_mode(ndev); netif_device_attach(ndev); } From 517e6a301f34613bff24a8e35b5455884f2d83d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 24 Nov 2022 12:49:12 +0100 Subject: [PATCH 789/963] perf: Fix perf_pending_task() UaF Per syzbot it is possible for perf_pending_task() to run after the event is free()'d. There are two related but distinct cases: - the task_work was already queued before destroying the event; - destroying the event itself queues the task_work. The first cannot be solved using task_work_cancel() since perf_release() itself might be called from a task_work (____fput), which means the current->task_works list is already empty and task_work_cancel() won't be able to find the perf_pending_task() entry. The simplest alternative is extending the perf_event lifetime to cover the task_work. The second is just silly, queueing a task_work while you know the event is going away makes no sense and is easily avoided by re-arranging how the event is marked STATE_DEAD and ensuring it goes through STATE_OFF on the way down. Reported-by: syzbot+9228d6098455bb209ec8@syzkaller.appspotmail.com Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marco Elver --- kernel/events/core.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 9d15d2d96119..ad824794c306 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2291,6 +2291,7 @@ event_sched_out(struct perf_event *event, !event->pending_work) { event->pending_work = 1; dec = false; + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); task_work_add(current, &event->pending_task, TWA_RESUME); } if (dec) @@ -2336,6 +2337,7 @@ group_sched_out(struct perf_event *group_event, #define DETACH_GROUP 0x01UL #define DETACH_CHILD 0x02UL +#define DETACH_DEAD 0x04UL /* * Cross CPU call to remove a performance event @@ -2356,12 +2358,20 @@ __perf_remove_from_context(struct perf_event *event, update_cgrp_time_from_cpuctx(cpuctx, false); } + /* + * Ensure event_sched_out() switches to OFF, at the very least + * this avoids raising perf_pending_task() at this time. + */ + if (flags & DETACH_DEAD) + event->pending_disable = 1; event_sched_out(event, cpuctx, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); if (flags & DETACH_CHILD) perf_child_detach(event); list_del_event(event, ctx); + if (flags & DETACH_DEAD) + event->state = PERF_EVENT_STATE_DEAD; if (!ctx->nr_events && ctx->is_active) { if (ctx == &cpuctx->ctx) @@ -5121,9 +5131,7 @@ int perf_event_release_kernel(struct perf_event *event) ctx = perf_event_ctx_lock(event); WARN_ON_ONCE(ctx->parent_ctx); - perf_remove_from_context(event, DETACH_GROUP); - raw_spin_lock_irq(&ctx->lock); /* * Mark this event as STATE_DEAD, there is no external reference to it * anymore. @@ -5135,8 +5143,7 @@ int perf_event_release_kernel(struct perf_event *event) * Thus this guarantees that we will in fact observe and kill _ALL_ * child events. */ - event->state = PERF_EVENT_STATE_DEAD; - raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); perf_event_ctx_unlock(event, ctx); @@ -6577,6 +6584,8 @@ static void perf_pending_task(struct callback_head *head) if (rctx >= 0) perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); + + put_event(event); } #ifdef CONFIG_GUEST_PERF_EVENTS From b97f4c68b7f4c2422b9f1e3a657315294b7614c1 Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Tue, 22 Nov 2022 14:32:14 -0800 Subject: [PATCH 790/963] ANDROID: Add ddk_headers for arm architecture. Similar to aarch64 and x86_64, we also add ddk_headers for arm so we can build DDK (Driver development kit) modules for the arm architecture for virtual devices. Test: Treehugger Bug: 254735056 Change-Id: I7ade4a6053e59d84b825285fbc6162b6e642682e Signed-off-by: Yifan Hong --- BUILD.bazel | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/BUILD.bazel b/BUILD.bazel index 2a3ff0dd24c0..ed296689b215 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -367,6 +367,15 @@ ddk_headers( visibility = ["//visibility:public"], ) +ddk_headers( + name = "all_headers_arm", + hdrs = [":all_headers_allowlist_arm"] + select({ + "//build/kernel/kleaf:allow_ddk_unsafe_headers_set": [":all_headers_unsafe"], + "//conditions:default": [], + }), + visibility = ["//visibility:public"], +) + ddk_headers( name = "all_headers_x86_64", hdrs = [":all_headers_allowlist_x86_64"] + select({ @@ -399,6 +408,24 @@ ddk_headers( visibility = ["//visibility:private"], ) +ddk_headers( + name = "all_headers_allowlist_arm", + hdrs = [ + ":all_headers_allowlist_arm_globs", + ":all_headers_allowlist_common_globs", + ], + # The list of include directories where source files can #include headers + # from. In other words, these are the `-I` option to the C compiler. + # These are prepended to LINUXINCLUDE. + linux_includes = [ + "arch/arm/include", + "arch/arm/include/uapi", + "include", + "include/uapi", + ], + visibility = ["//visibility:private"], +) + ddk_headers( name = "all_headers_allowlist_x86_64", hdrs = [ @@ -423,6 +450,13 @@ ddk_headers( # These are separate filegroup targets so the all_headers_allowlist_* are # more friendly to batch BUILD file update tools like buildozer. +# globs() for arm only +filegroup( + name = "all_headers_allowlist_arm_globs", + srcs = glob(["arch/arm/include/**/*.h"]), + visibility = ["//visibility:private"], +) + # globs() for arm64 only filegroup( name = "all_headers_allowlist_aarch64_globs", From 7e1864332fbc1b993659eab7974da9fe8bf8c128 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sun, 30 Oct 2022 20:45:17 +0800 Subject: [PATCH 791/963] riscv: fix race when vmap stack overflow Currently, when detecting vmap stack overflow, riscv firstly switches to the so called shadow stack, then use this shadow stack to call the get_overflow_stack() to get the overflow stack. However, there's a race here if two or more harts use the same shadow stack at the same time. To solve this race, we introduce spin_shadow_stack atomic var, which will be swap between its own address and 0 in atomic way, when the var is set, it means the shadow_stack is being used; when the var is cleared, it means the shadow_stack isn't being used. Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection") Signed-off-by: Jisheng Zhang Suggested-by: Guo Ren Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/20221030124517.2370-1-jszhang@kernel.org [Palmer: Add AQ to the swap, and also some comments.] Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/asm.h | 1 + arch/riscv/kernel/entry.S | 13 +++++++++++++ arch/riscv/kernel/traps.c | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 618d7c5af1a2..e15a1c9f1cf8 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -23,6 +23,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) #define REG_SC __REG_SEL(sc.d, sc.w) +#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 98f502654edd..5fdb6ba09600 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -387,6 +387,19 @@ handle_syscall_trace_exit: #ifdef CONFIG_VMAP_STACK handle_kernel_stack_overflow: + /* + * Takes the psuedo-spinlock for the shadow stack, in case multiple + * harts are concurrently overflowing their kernel stacks. We could + * store any value here, but since we're overflowing the kernel stack + * already we only have SP to use as a scratch register. So we just + * swap in the address of the spinlock, as that's definately non-zero. + * + * Pairs with a store_release in handle_bad_stack(). + */ +1: la sp, spin_shadow_stack + REG_AMOSWAP_AQ sp, sp, (sp) + bnez sp, 1b + la sp, shadow_stack addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index bb6a450f0ecc..be54ccea8c47 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -213,11 +213,29 @@ asmlinkage unsigned long get_overflow_stack(void) OVERFLOW_STACK_SIZE; } +/* + * A pseudo spinlock to protect the shadow stack from being used by multiple + * harts concurrently. This isn't a real spinlock because the lock side must + * be taken without a valid stack and only a single register, it's only taken + * while in the process of panicing anyway so the performance and error + * checking a proper spinlock gives us doesn't matter. + */ +unsigned long spin_shadow_stack; + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + /* + * We're done with the shadow stack by this point, as we're on the + * overflow stack. Tell any other concurrent overflowing harts that + * they can proceed with panicing by releasing the pseudo-spinlock. + * + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. + */ + smp_store_release(&spin_shadow_stack, 0); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); From 74f6bb55c834da6d4bac24f44868202743189b2b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 3 Nov 2022 01:02:54 +0800 Subject: [PATCH 792/963] riscv: vdso: fix section overlapping under some conditions lkp reported a build error, I tried the config and can reproduce build error as below: VDSOLD arch/riscv/kernel/vdso/vdso.so.dbg ld.lld: error: section .note file range overlaps with .text >>> .note range is [0x7C8, 0x803] >>> .text range is [0x800, 0x1993] ld.lld: error: section .text file range overlaps with .dynamic >>> .text range is [0x800, 0x1993] >>> .dynamic range is [0x808, 0x937] ld.lld: error: section .note virtual address range overlaps with .text >>> .note range is [0x7C8, 0x803] >>> .text range is [0x800, 0x1993] Fix it by setting DISABLE_BRANCH_PROFILING which will disable branch tracing for vdso, thus avoid useless _ftrace_annotated_branch section and _ftrace_branch section. Although we can also fix it by removing the hardcoded .text begin address, but I think that's another story and should be put into another patch. Link: https://lore.kernel.org/lkml/202210122123.Cc4FPShJ-lkp@intel.com/#r Reported-by: kernel test robot Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221102170254.1925-1-jszhang@kernel.org Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index db6548509bb3..06e6b27f3bcc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,6 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) From 6fdd5d2f8c2f54b7fad4ff4df2a19542aeaf6102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 15 Nov 2022 10:06:40 +0100 Subject: [PATCH 793/963] riscv: mm: Proper page permissions after initmem free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 64-bit RISC-V kernels have the kernel image mapped separately to alias the linear map. The linear map and the kernel image map are documented as "direct mapping" and "kernel" respectively in [1]. At image load time, the linear map corresponding to the kernel image is set to PAGE_READ permission, and the kernel image map is set to PAGE_READ|PAGE_EXEC. When the initmem is freed, the pages in the linear map should be restored to PAGE_READ|PAGE_WRITE, whereas the corresponding pages in the kernel image map should be restored to PAGE_READ, by removing the PAGE_EXEC permission. This is not the case. For 64-bit kernels, only the linear map is restored to its proper page permissions at initmem free, and not the kernel image map. In practise this results in that the kernel can potentially jump to dead __init code, and start executing invalid instructions, without getting an exception. Restore the freed initmem properly, by setting both the kernel image map to the correct permissions. [1] Documentation/riscv/vm-layout.rst Fixes: e5c35fa04019 ("riscv: Map the kernel with correct permissions the first time") Signed-off-by: Björn Töpel Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20221115090641.258476-1-bjorn@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 67ec1fadcfe2..86acd690d529 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -322,10 +322,11 @@ subsys_initcall(topology_init); void free_initmem(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), - IS_ENABLED(CONFIG_64BIT) ? - set_memory_rw : set_memory_rw_nx); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); + if (IS_ENABLED(CONFIG_64BIT)) + set_kernel_memory(__init_begin, __init_end, set_memory_nx); + } free_initmem_default(POISON_FREE_INITMEM); } From b17d19a5314a37f7197afd1a0200affd21a7227d Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 20 Oct 2022 10:16:02 -0400 Subject: [PATCH 794/963] riscv: kexec: Fixup irq controller broken in kexec crash path If a crash happens on cpu3 and all interrupts are binding on cpu0, the bad irq routing will cause a crash kernel which can't receive any irq. Because crash kernel won't clean up all harts' PLIC enable bits in enable registers. This patch is similar to 9141a003a491 ("ARM: 7316/1: kexec: EOI active and mask all interrupts in kexec crash path") and 78fd584cdec0 ("arm64: kdump: implement machine_crash_shutdown()"), and PowerPC also has the same mechanism. Fixes: fba8a8674f68 ("RISC-V: Add kexec support") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Reviewed-by: Xianting Tian Cc: Nick Kossifidis Cc: Palmer Dabbelt Link: https://lore.kernel.org/r/20221020141603.2856206-2-guoren@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/machine_kexec.c | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ee79e6839b86..db41c676e5a2 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -15,6 +15,8 @@ #include /* For unreachable() */ #include /* For cpu_down() */ #include +#include +#include /* * kexec_image_info - Print received image details @@ -154,6 +156,37 @@ void crash_smp_send_stop(void) cpus_stopped = 1; } +static void machine_kexec_mask_interrupts(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * @@ -169,6 +202,8 @@ machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + pr_info("Starting crashdump kernel...\n"); } From 9b932aadfc47de5d70b53ea04b0d1b5f6c82945b Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 20 Oct 2022 10:16:03 -0400 Subject: [PATCH 795/963] riscv: kexec: Fixup crash_smp_send_stop without multi cores Current crash_smp_send_stop is the same as the generic one in kernel/panic and misses crash_save_cpu in percpu. This patch is inspired by 78fd584cdec0 ("arm64: kdump: implement machine_crash_shutdown()") and adds the same mechanism for riscv. Before this patch, test result: crash> help -r CPU 0: [OFFLINE] CPU 1: epc : ffffffff80009ff0 ra : ffffffff800b789a sp : ff2000001098bb40 gp : ffffffff815fca60 tp : ff60000004680000 t0 : 6666666666663c5b t1 : 0000000000000000 t2 : 666666666666663c s0 : ff2000001098bc90 s1 : ffffffff81600798 a0 : ff2000001098bb48 a1 : 0000000000000000 a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 a5 : ff60000004690800 a6 : 0000000000000000 a7 : 0000000000000000 s2 : ff2000001098bb48 s3 : ffffffff81093ec8 s4 : ffffffff816004ac s5 : 0000000000000000 s6 : 0000000000000007 s7 : ffffffff80e7f720 s8 : 00fffffffffff3f0 s9 : 0000000000000007 s10: 00aaaaaaaab98700 s11: 0000000000000001 t3 : ffffffff819a8097 t4 : ffffffff819a8097 t5 : ffffffff819a8098 t6 : ff2000001098b9a8 CPU 2: [OFFLINE] CPU 3: [OFFLINE] After this patch, test result: crash> help -r CPU 0: epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ffffffff81403eb0 gp : ffffffff815fcb48 tp : ffffffff81413400 t0 : 0000000000000000 t1 : 0000000000000000 t2 : 0000000000000000 s0 : ffffffff81403ec0 s1 : 0000000000000000 a0 : 0000000000000000 a1 : 0000000000000000 a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000 s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18 s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000 s8 : 0000000000000000 s9 : 0000000080039eac s10: 0000000000000000 s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000 t5 : 0000000000000000 t6 : 0000000000000000 CPU 1: epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ff2000000068bf30 gp : ffffffff815fcb48 tp : ff6000000240d400 t0 : 0000000000000000 t1 : 0000000000000000 t2 : 0000000000000000 s0 : ff2000000068bf40 s1 : 0000000000000001 a0 : 0000000000000000 a1 : 0000000000000000 a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000 s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18 s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000 s8 : 0000000000000000 s9 : 0000000080039ea8 s10: 0000000000000000 s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000 t5 : 0000000000000000 t6 : 0000000000000000 CPU 2: epc : ffffffff80003f34 ra : ffffffff808caa7c sp : ff20000000693f30 gp : ffffffff815fcb48 tp : ff6000000240e900 t0 : 0000000000000000 t1 : 0000000000000000 t2 : 0000000000000000 s0 : ff20000000693f40 s1 : 0000000000000002 a0 : 0000000000000000 a1 : 0000000000000000 a2 : 0000000000000000 a3 : 0000000000000000 a4 : 0000000000000000 a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000000000000 s2 : ffffffff816001c8 s3 : ffffffff81600370 s4 : ffffffff80c32e18 s5 : ffffffff819d3018 s6 : ffffffff810e2110 s7 : 0000000000000000 s8 : 0000000000000000 s9 : 0000000080039eb0 s10: 0000000000000000 s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000000000000000 t5 : 0000000000000000 t6 : 0000000000000000 CPU 3: epc : ffffffff8000a1e4 ra : ffffffff800b7bba sp : ff200000109bbb40 gp : ffffffff815fcb48 tp : ff6000000373aa00 t0 : 6666666666663c5b t1 : 0000000000000000 t2 : 666666666666663c s0 : ff200000109bbc90 s1 : ffffffff816007a0 a0 : ff200000109bbb48 a1 : 0000000000000000 a2 : 0000000000000000 a3 : 0000000000000001 a4 : 0000000000000000 a5 : ff60000002c61c00 a6 : 0000000000000000 a7 : 0000000000000000 s2 : ff200000109bbb48 s3 : ffffffff810941a8 s4 : ffffffff816004b4 s5 : 0000000000000000 s6 : 0000000000000007 s7 : ffffffff80e7f7a0 s8 : 00fffffffffff3f0 s9 : 0000000000000007 s10: 00aaaaaaaab98700 s11: 0000000000000001 t3 : ffffffff819a8097 t4 : ffffffff819a8097 t5 : ffffffff819a8098 t6 : ff200000109bb9a8 Fixes: ad943893d5f1 ("RISC-V: Fixup schedule out issue in machine_crash_shutdown()") Reviewed-by: Xianting Tian Signed-off-by: Guo Ren Signed-off-by: Guo Ren Cc: Nick Kossifidis Link: https://lore.kernel.org/r/20221020141603.2856206-3-guoren@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/smp.h | 3 + arch/riscv/kernel/machine_kexec.c | 21 ++----- arch/riscv/kernel/smp.c | 97 ++++++++++++++++++++++++++++++- 3 files changed, 103 insertions(+), 18 deletions(-) diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index d3443be7eedc..3831b638ecab 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); /* Clear IPI for current CPU */ void riscv_clear_ipi(void); +/* Check other CPUs stop or not */ +bool smp_crash_stop_failed(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index db41c676e5a2..2d139b724bc8 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -140,22 +140,6 @@ void machine_shutdown(void) #endif } -/* Override the weak function in kernel/panic.c */ -void crash_smp_send_stop(void) -{ - static int cpus_stopped; - - /* - * This function can be called twice in panic path, but obviously - * we execute this only once. - */ - if (cpus_stopped) - return; - - smp_send_stop(); - cpus_stopped = 1; -} - static void machine_kexec_mask_interrupts(void) { unsigned int i; @@ -230,6 +214,11 @@ machine_kexec(struct kimage *image) void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; +#ifdef CONFIG_SMP + WARN(smp_crash_stop_failed(), + "Some CPUs may be stale, kdump will be unreliable.\n"); +#endif + if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 760a64518c58..8c3b59f1f9b8 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -22,11 +23,13 @@ #include #include #include +#include enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, IPI_MAX @@ -71,6 +74,32 @@ static void ipi_stop(void) wait_for_interrupt(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); + +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_has_hotplug(cpu)) + cpu_ops[cpu]->cpu_stop(); +#endif + + for(;;) + wait_for_interrupt(); +} +#else +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + unreachable(); +} +#endif + static const struct riscv_ipi_ops *ipi_ops __ro_after_init; void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) @@ -124,8 +153,9 @@ void arch_irq_work_raise(void) void handle_IPI(struct pt_regs *regs) { - unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; - unsigned long *stats = ipi_data[smp_processor_id()].stats; + unsigned int cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[cpu].bits; + unsigned long *stats = ipi_data[cpu].stats; riscv_clear_ipi(); @@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs) ipi_stop(); } + if (ops & (1 << IPI_CPU_CRASH_STOP)) { + ipi_cpu_crash_stop(cpu, get_irq_regs()); + } + if (ops & (1 << IPI_IRQ_WORK)) { stats[IPI_IRQ_WORK]++; irq_work_run(); @@ -176,6 +210,7 @@ static const char * const ipi_names[] = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", }; @@ -235,6 +270,64 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + cpumask_t mask; + unsigned long timeout; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); From 9464d0b68f11a9bc768370c3260ec02b3550447b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 24 Nov 2022 12:21:46 -0500 Subject: [PATCH 796/963] netfilter: conntrack: fix using __this_cpu_add in preemptible Currently in nf_conntrack_hash_check_insert(), when it fails in nf_ct_ext_valid_pre/post(), NF_CT_STAT_INC() will be called in the preemptible context, a call trace can be triggered: BUG: using __this_cpu_add() in preemptible [00000000] code: conntrack/1636 caller is nf_conntrack_hash_check_insert+0x45/0x430 [nf_conntrack] Call Trace: dump_stack_lvl+0x33/0x46 check_preemption_disabled+0xc3/0xf0 nf_conntrack_hash_check_insert+0x45/0x430 [nf_conntrack] ctnetlink_create_conntrack+0x3cd/0x4e0 [nf_conntrack_netlink] ctnetlink_new_conntrack+0x1c0/0x450 [nf_conntrack_netlink] nfnetlink_rcv_msg+0x277/0x2f0 [nfnetlink] netlink_rcv_skb+0x50/0x100 nfnetlink_rcv+0x65/0x144 [nfnetlink] netlink_unicast+0x1ae/0x290 netlink_sendmsg+0x257/0x4f0 sock_sendmsg+0x5f/0x70 This patch is to fix it by changing to use NF_CT_STAT_INC_ATOMIC() for nf_ct_ext_valid_pre/post() check in nf_conntrack_hash_check_insert(), as well as nf_ct_ext_valid_post() in __nf_conntrack_confirm(). Note that nf_ct_ext_valid_pre() check in __nf_conntrack_confirm() is safe to use NF_CT_STAT_INC(), as it's under local_bh_disable(). Fixes: c56716c69ce1 ("netfilter: extensions: introduce extension genid count") Signed-off-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2692139ce417..23b3fedd619a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -891,7 +891,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone = nf_ct_zone(ct); if (!nf_ct_ext_valid_pre(ct->ext)) { - NF_CT_STAT_INC(net, insert_failed); + NF_CT_STAT_INC_ATOMIC(net, insert_failed); return -ETIMEDOUT; } @@ -938,7 +938,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) if (!nf_ct_ext_valid_post(ct->ext)) { nf_ct_kill(ct); - NF_CT_STAT_INC(net, drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return -ETIMEDOUT; } @@ -1275,7 +1275,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ if (!nf_ct_ext_valid_post(ct->ext)) { nf_ct_kill(ct); - NF_CT_STAT_INC(net, drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } From 1feeae071507ad65cf9f462a1bdd543a4bf89e71 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 28 Nov 2022 10:58:53 +0100 Subject: [PATCH 797/963] netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark All warnings (new ones prefixed by >>): net/netfilter/nf_conntrack_netlink.c: In function '__ctnetlink_glue_build': >> net/netfilter/nf_conntrack_netlink.c:2674:13: warning: unused variable 'mark' [-Wunused-variable] 2674 | u32 mark; | ^~~~ Fixes: 52d1aa8b8249 ("netfilter: conntrack: Fix data-races around ct mark") Reported-by: kernel test robot Tested-by: Ivan Babrou Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d71150a40fb0..1286ae7d4609 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -328,8 +328,13 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark) +static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { + u32 mark = READ_ONCE(ct->mark); + + if (!mark) + return 0; + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; @@ -543,7 +548,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 || + ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -722,7 +727,6 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) struct sk_buff *skb; unsigned int type; unsigned int flags = 0, group; - u32 mark; int err; if (events & (1 << IPCT_DESTROY)) { @@ -827,9 +831,8 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - mark = READ_ONCE(ct->mark); - if ((events & (1 << IPCT_MARK) || mark) && - ctnetlink_dump_mark(skb, mark) < 0) + if (events & (1 << IPCT_MARK) && + ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -2671,7 +2674,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; - u32 mark; zone = nf_ct_zone(ct); @@ -2733,8 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - mark = READ_ONCE(ct->mark); - if (mark && ctnetlink_dump_mark(skb, mark) < 0) + if (ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) From a56ea6147facce4ac1fc38675455f9733d96232b Mon Sep 17 00:00:00 2001 From: Lei Rao Date: Tue, 29 Nov 2022 17:48:11 +0800 Subject: [PATCH 798/963] nvme-pci: clear the prp2 field when not used If the prp2 field is not filled in nvme_setup_prp_simple(), the prp2 field is garbage data. According to nvme spec, the prp2 is reserved if the data transfer does not cross a memory page boundary, so clear it to zero if it is not used. Signed-off-by: Lei Rao Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f4335519399d..488ad7dabeb8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -797,6 +797,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); if (bv->bv_len > first_prp_len) cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + else + cmnd->dptr.prp2 = 0; return BLK_STS_OK; } From 899d2a05dc14733cfba6224083c6b0dd5a738590 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Fri, 18 Nov 2022 16:27:56 -0700 Subject: [PATCH 799/963] nvme: fix SRCU protection of nvme_ns_head list Walking the nvme_ns_head siblings list is protected by the head's srcu in nvme_ns_head_submit_bio() but not nvme_mpath_revalidate_paths(). Removing namespaces from the list also fails to synchronize the srcu. Concurrent scan work can therefore cause use-after-frees. Hold the head's srcu lock in nvme_mpath_revalidate_paths() and synchronize with the srcu, not the global RCU, in nvme_ns_remove(). Observed the following panic when making NVMe/RDMA connections with native multipath on the Rocky Linux 8.6 kernel (it seems the upstream kernel has the same race condition). Disassembly shows the faulting instruction is cmp 0x50(%rdx),%rcx; computing capacity != get_capacity(ns->disk). Address 0x50 is dereferenced because ns->disk is NULL. The NULL disk appears to be the result of concurrent scan work freeing the namespace (note the log line in the middle of the panic). [37314.206036] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 [37314.206036] nvme0n3: detected capacity change from 0 to 11811160064 [37314.299753] PGD 0 P4D 0 [37314.299756] Oops: 0000 [#1] SMP PTI [37314.299759] CPU: 29 PID: 322046 Comm: kworker/u98:3 Kdump: loaded Tainted: G W X --------- - - 4.18.0-372.32.1.el8test86.x86_64 #1 [37314.299762] Hardware name: Dell Inc. PowerEdge R720/0JP31P, BIOS 2.7.0 05/23/2018 [37314.299763] Workqueue: nvme-wq nvme_scan_work [nvme_core] [37314.299783] RIP: 0010:nvme_mpath_revalidate_paths+0x26/0xb0 [nvme_core] [37314.299790] Code: 1f 44 00 00 66 66 66 66 90 55 53 48 8b 5f 50 48 8b 83 c8 c9 00 00 48 8b 13 48 8b 48 50 48 39 d3 74 20 48 8d 42 d0 48 8b 50 20 <48> 3b 4a 50 74 05 f0 80 60 70 ef 48 8b 50 30 48 8d 42 d0 48 39 d3 [37315.058803] RSP: 0018:ffffabe28f913d10 EFLAGS: 00010202 [37315.121316] RAX: ffff927a077da800 RBX: ffff92991dd70000 RCX: 0000000001600000 [37315.206704] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff92991b719800 [37315.292106] RBP: ffff929a6b70c000 R08: 000000010234cd4a R09: c0000000ffff7fff [37315.377501] R10: 0000000000000001 R11: ffffabe28f913a30 R12: 0000000000000000 [37315.462889] R13: ffff92992716600c R14: ffff929964e6e030 R15: ffff92991dd70000 [37315.548286] FS: 0000000000000000(0000) GS:ffff92b87fb80000(0000) knlGS:0000000000000000 [37315.645111] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [37315.713871] CR2: 0000000000000050 CR3: 0000002208810006 CR4: 00000000000606e0 [37315.799267] Call Trace: [37315.828515] nvme_update_ns_info+0x1ac/0x250 [nvme_core] [37315.892075] nvme_validate_or_alloc_ns+0x2ff/0xa00 [nvme_core] [37315.961871] ? __blk_mq_free_request+0x6b/0x90 [37316.015021] nvme_scan_work+0x151/0x240 [nvme_core] [37316.073371] process_one_work+0x1a7/0x360 [37316.121318] ? create_worker+0x1a0/0x1a0 [37316.168227] worker_thread+0x30/0x390 [37316.212024] ? create_worker+0x1a0/0x1a0 [37316.258939] kthread+0x10a/0x120 [37316.297557] ? set_kthread_struct+0x50/0x50 [37316.347590] ret_from_fork+0x35/0x40 [37316.390360] Modules linked in: nvme_rdma nvme_tcp(X) nvme_fabrics nvme_core netconsole iscsi_tcp libiscsi_tcp dm_queue_length dm_service_time nf_conntrack_netlink br_netfilter bridge stp llc overlay nft_chain_nat ipt_MASQUERADE nf_nat xt_addrtype xt_CT nft_counter xt_state xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 xt_comment xt_multiport nft_compat nf_tables libcrc32c nfnetlink dm_multipath tg3 rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm intel_rapl_msr iTCO_wdt iTCO_vendor_support dcdbas intel_rapl_common sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ipmi_ssif kvm irqbypass crct10dif_pclmul crc32_pclmul mlx5_ib ghash_clmulni_intel ib_uverbs rapl intel_cstate intel_uncore ib_core ipmi_si joydev mei_me pcspkr ipmi_devintf mei lpc_ich wmi ipmi_msghandler acpi_power_meter ext4 mbcache jbd2 sd_mod t10_pi sg mgag200 mlx5_core drm_kms_helper syscopyarea [37316.390419] sysfillrect ahci sysimgblt fb_sys_fops libahci drm crc32c_intel libata mlxfw pci_hyperv_intf tls i2c_algo_bit psample dm_mirror dm_region_hash dm_log dm_mod fuse [last unloaded: nvme_core] [37317.645908] CR2: 0000000000000050 Fixes: e7d65803e2bb ("nvme-multipath: revalidate paths during rescan") Signed-off-by: Caleb Sander Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/multipath.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da55ce45ac70..69e333922bea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4304,7 +4304,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ - synchronize_rcu(); + synchronize_srcu(&ns->head->srcu); if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 93e2138a8b42..7e025b8948cb 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) struct nvme_ns_head *head = ns->head; sector_t capacity = get_capacity(head->disk); int node; + int srcu_idx; + srcu_idx = srcu_read_lock(&head->srcu); list_for_each_entry_rcu(ns, &head->list, siblings) { if (capacity != get_capacity(ns->disk)) clear_bit(NVME_NS_READY, &ns->flags); } + srcu_read_unlock(&head->srcu, srcu_idx); for_each_node(node) rcu_assign_pointer(head->current_path[node], NULL); From 8c115864501fc09932cdfec53d9ec1cde82b4a28 Mon Sep 17 00:00:00 2001 From: Qiqi Zhang Date: Fri, 25 Nov 2022 18:45:58 +0800 Subject: [PATCH 800/963] drm/bridge: ti-sn65dsi86: Fix output polarity setting bug According to the description in ti-sn65dsi86's datasheet: CHA_HSYNC_POLARITY: 0 = Active High Pulse. Synchronization signal is high for the sync pulse width. (default) 1 = Active Low Pulse. Synchronization signal is low for the sync pulse width. CHA_VSYNC_POLARITY: 0 = Active High Pulse. Synchronization signal is high for the sync pulse width. (Default) 1 = Active Low Pulse. Synchronization signal is low for the sync pulse width. We should only set these bits when the polarity is negative. Fixes: a095f15c00e2 ("drm/bridge: add support for sn65dsi86 bridge driver") Signed-off-by: Qiqi Zhang Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Reviewed-by: Tomi Valkeinen Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20221125104558.84616-1-eddy.zhang@rock-chips.com --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 3c3561942eb6..eb24322df721 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -931,9 +931,9 @@ static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata) &pdata->bridge.encoder->crtc->state->adjusted_mode; u8 hsync_polarity = 0, vsync_polarity = 0; - if (mode->flags & DRM_MODE_FLAG_PHSYNC) + if (mode->flags & DRM_MODE_FLAG_NHSYNC) hsync_polarity = CHA_HSYNC_POLARITY; - if (mode->flags & DRM_MODE_FLAG_PVSYNC) + if (mode->flags & DRM_MODE_FLAG_NVSYNC) vsync_polarity = CHA_VSYNC_POLARITY; ti_sn65dsi86_write_u16(pdata, SN_CHA_ACTIVE_LINE_LENGTH_LOW_REG, From 9b84f0f74d0d716e3fd18dc428ac111266ef5844 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 30 Nov 2022 22:06:04 +0900 Subject: [PATCH 801/963] ALSA: dice: fix regression for Lexicon I-ONIX FW810S For Lexicon I-ONIX FW810S, the call of ioctl(2) with SNDRV_PCM_IOCTL_HW_PARAMS can returns -ETIMEDOUT. This is a regression due to the commit 41319eb56e19 ("ALSA: dice: wait just for NOTIFY_CLOCK_ACCEPTED after GLOBAL_CLOCK_SELECT operation"). The device does not emit NOTIFY_CLOCK_ACCEPTED notification when accepting GLOBAL_CLOCK_SELECT operation with the same parameters as current ones. This commit fixes the regression. When receiving no notification, return -ETIMEDOUT as long as operating for any change. Fixes: 41319eb56e19 ("ALSA: dice: wait just for NOTIFY_CLOCK_ACCEPTED after GLOBAL_CLOCK_SELECT operation") Cc: Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20221130130604.29774-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/dice/dice-stream.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index f99e00083141..4c677c8546c7 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate, static int select_clock(struct snd_dice *dice, unsigned int rate) { - __be32 reg; + __be32 reg, new; u32 data; int i; int err; @@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate) if (completion_done(&dice->clock_accepted)) reinit_completion(&dice->clock_accepted); - reg = cpu_to_be32(data); + new = cpu_to_be32(data); err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT, - ®, sizeof(reg)); + &new, sizeof(new)); if (err < 0) return err; if (wait_for_completion_timeout(&dice->clock_accepted, - msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) - return -ETIMEDOUT; + msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) { + if (reg != new) + return -ETIMEDOUT; + } return 0; } From e542baf30b48605d4336bf54b98e76b8fb98af30 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 30 Nov 2022 11:14:35 -0500 Subject: [PATCH 802/963] KVM: x86: fix uninitialized variable use on KVM_REQ_TRIPLE_FAULT If a triple fault was fixed by kvm_x86_ops.nested_ops->triple_fault (by turning it into a vmexit), there is no need to leave vcpu_enter_guest(). Any vcpu->requests will be caught later before the actual vmentry, and in fact vcpu_enter_guest() was not initializing the "r" variable. Depending on the compiler's whims, this could cause the x86_64/triple_fault_event_test test to fail. Cc: Maxim Levitsky Fixes: 92e7d5c83aff ("KVM: x86: allow L1 to not intercept triple fault") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2835bd796639..69227f77b201 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10574,8 +10574,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; vcpu->mmio_needed = 0; r = 0; + goto out; } - goto out; } if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { /* Page is swapped out. Do synthetic halt */ From 45fecdb9f658d9c82960c98240bc0770ade19aca Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Tue, 22 Nov 2022 20:35:08 +0800 Subject: [PATCH 803/963] gpio: amd8111: Fix PCI device reference count leak for_each_pci_dev() is implemented by pci_get_device(). The comment of pci_get_device() says that it will increase the reference count for the returned pci_dev and also decrease the reference count for the input pci_dev @from if it is not NULL. If we break for_each_pci_dev() loop with pdev not NULL, we need to call pci_dev_put() to decrease the reference count. Add the missing pci_dev_put() after the 'out' label. Since pci_dev_put() can handle NULL input parameter, there is no problem for the 'Device not found' branch. For the normal path, add pci_dev_put() in amd_gpio_exit(). Fixes: f942a7de047d ("gpio: add a driver for GPIO pins found on AMD-8111 south bridge chips") Signed-off-by: Xiongfeng Wang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-amd8111.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c index 14e6b3e64add..6f3ded619c8b 100644 --- a/drivers/gpio/gpio-amd8111.c +++ b/drivers/gpio/gpio-amd8111.c @@ -226,7 +226,10 @@ static int __init amd_gpio_init(void) ioport_unmap(gp.pm); goto out; } + return 0; + out: + pci_dev_put(pdev); return err; } @@ -234,6 +237,7 @@ static void __exit amd_gpio_exit(void) { gpiochip_remove(&gp.chip); ioport_unmap(gp.pm); + pci_dev_put(gp.pdev); } module_init(amd_gpio_init); From 04aa64375f48a5d430b5550d9271f8428883e550 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 14 Nov 2022 11:38:24 +0100 Subject: [PATCH 804/963] drm/i915: fix TLB invalidation for Gen12 video and compute engines In case of Gen12 video and compute engines, TLB_INV registers are masked - to modify one bit, corresponding bit in upper half of the register must be enabled, otherwise nothing happens. CVE: CVE-2022-4139 Suggested-by: Chris Wilson Signed-off-by: Andrzej Hajda Acked-by: Daniel Vetter Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/gt/intel_gt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d0b03a928b9a..5c931b6696c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1017,6 +1017,11 @@ static void mmio_invalidate_full(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; + if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS)) + rb.bit = _MASKED_BIT_ENABLE(rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); awake |= engine->mask; } From ef4d3ea40565a781c25847e9cb96c1bd9f462bc6 Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Wed, 30 Nov 2022 17:55:51 +0000 Subject: [PATCH 805/963] afs: Fix server->active leak in afs_put_server The atomic_read was accidentally replaced with atomic_inc_return, which prevents the server from getting cleaned up and causes rmmod to hang with a warning: Can't purge s=00000001 Fixes: 2757a4dc1849 ("afs: Fix access after dec in put functions") Signed-off-by: Marc Dionne Signed-off-by: David Howells Link: https://lore.kernel.org/r/20221130174053.2665818-1-marc.dionne@auristor.com/ Signed-off-by: Linus Torvalds --- fs/afs/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/server.c b/fs/afs/server.c index 4981baf97835..b5237206eac3 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server, if (!server) return; - a = atomic_inc_return(&server->active); + a = atomic_read(&server->active); zero = __refcount_dec_and_test(&server->ref, &r); trace_afs_server(debug_id, r - 1, a, reason); if (unlikely(zero)) From eed913f6919e253f35d454b2f115f2a4db2b741a Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 28 Oct 2022 22:00:00 +0900 Subject: [PATCH 806/963] e1000e: Fix TX dispatch condition e1000_xmit_frame is expected to stop the queue and dispatch frames to hardware if there is not sufficient space for the next frame in the buffer, but sometimes it failed to do so because the estimated maximum size of frame was wrong. As the consequence, the later invocation of e1000_xmit_frame failed with NETDEV_TX_BUSY, and the frame in the buffer remained forever, resulting in a watchdog failure. This change fixes the estimated size by making it match with the condition for NETDEV_TX_BUSY. Apparently, the old estimation failed to account for the following lines which determines the space requirement for not causing NETDEV_TX_BUSY: ``` /* reserve a descriptor for the offload context */ if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) count++; count++; count += DIV_ROUND_UP(len, adapter->tx_fifo_limit); ``` This issue was found when running http-stress02 test included in Linux Test Project 20220930 on QEMU with the following commandline: ``` qemu-system-x86_64 -M q35,accel=kvm -m 8G -smp 8 -drive if=virtio,format=raw,file=root.img,file.locking=on -device e1000e,netdev=netdev -netdev tap,script=ifup,downscript=no,id=netdev ``` Fixes: bc7f75fa9788 ("[E1000E]: New pci-express e1000 driver (currently for ICH9 devices only)") Signed-off-by: Akihiko Odaki Tested-by: Gurucharan G (A Contingent worker at Intel) Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 49e926959ad3..55cf2f62bb30 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5936,9 +5936,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, e1000_tx_queue(tx_ring, tx_flags, count); /* Make sure there is space in the ring for the next send. */ e1000_maybe_stop_tx(tx_ring, - (MAX_SKB_FRAGS * + ((MAX_SKB_FRAGS + 1) * DIV_ROUND_UP(PAGE_SIZE, - adapter->tx_fifo_limit) + 2)); + adapter->tx_fifo_limit) + 4)); if (!netdev_xmit_more() || netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { From 28e96556baca7056d11d9fb3cdd0aba4483e00d8 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 25 Nov 2022 22:30:31 +0900 Subject: [PATCH 807/963] igb: Allocate MSI-X vector when testing Without this change, the interrupt test fail with MSI-X environment: $ sudo ethtool -t enp0s2 offline [ 43.921783] igb 0000:00:02.0: offline testing starting [ 44.855824] igb 0000:00:02.0 enp0s2: igb: enp0s2 NIC Link is Down [ 44.961249] igb 0000:00:02.0 enp0s2: igb: enp0s2 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX [ 51.272202] igb 0000:00:02.0: testing shared interrupt [ 56.996975] igb 0000:00:02.0 enp0s2: igb: enp0s2 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX The test result is FAIL The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 4 Loopback test (offline) 0 Link test (on/offline) 0 Here, "4" means an expected interrupt was not delivered. To fix this, route IRQs correctly to the first MSI-X vector by setting IVAR_MISC. Also, set bit 0 of EIMS so that the vector will not be masked. The interrupt test now runs properly with this change: $ sudo ethtool -t enp0s2 offline [ 42.762985] igb 0000:00:02.0: offline testing starting [ 50.141967] igb 0000:00:02.0: testing shared interrupt [ 56.163957] igb 0000:00:02.0 enp0s2: igb: enp0s2 NIC Link is Up 1000 Mbps Full Duplex, Flow Control: RX/TX The test result is PASS The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 0 Link test (on/offline) 0 Fixes: 4eefa8f01314 ("igb: add single vector msi-x testing to interrupt test") Signed-off-by: Akihiko Odaki Reviewed-by: Maciej Fijalkowski Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index e5f3e7680dc6..ff911af16a4b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1413,6 +1413,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) *data = 1; return -1; } + wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8); + wr32(E1000_EIMS, BIT(0)); } else if (adapter->flags & IGB_FLAG_HAS_MSI) { shared_int = false; if (request_irq(irq, From dec1d352de5c6e2bcdbb03a2e7a84d85ad2e4f14 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 8 Nov 2022 10:43:57 -0800 Subject: [PATCH 808/963] mm: replace VM_WARN_ON to pr_warn if the node is offline with __GFP_THISNODE Syzbot reported the below splat: WARNING: CPU: 1 PID: 3646 at include/linux/gfp.h:221 __alloc_pages_node include/linux/gfp.h:221 [inline] WARNING: CPU: 1 PID: 3646 at include/linux/gfp.h:221 hpage_collapse_alloc_page mm/khugepaged.c:807 [inline] WARNING: CPU: 1 PID: 3646 at include/linux/gfp.h:221 alloc_charge_hpage+0x802/0xaa0 mm/khugepaged.c:963 Modules linked in: CPU: 1 PID: 3646 Comm: syz-executor210 Not tainted 6.1.0-rc1-syzkaller-00454-ga70385240892 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 RIP: 0010:__alloc_pages_node include/linux/gfp.h:221 [inline] RIP: 0010:hpage_collapse_alloc_page mm/khugepaged.c:807 [inline] RIP: 0010:alloc_charge_hpage+0x802/0xaa0 mm/khugepaged.c:963 Code: e5 01 4c 89 ee e8 6e f9 ae ff 4d 85 ed 0f 84 28 fc ff ff e8 70 fc ae ff 48 8d 6b ff 4c 8d 63 07 e9 16 fc ff ff e8 5e fc ae ff <0f> 0b e9 96 fa ff ff 41 bc 1a 00 00 00 e9 86 fd ff ff e8 47 fc ae RSP: 0018:ffffc90003fdf7d8 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff888077f457c0 RSI: ffffffff81cd8f42 RDI: 0000000000000001 RBP: ffff888079388c0c R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: dffffc0000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f6b48ccf700(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f6b48a819f0 CR3: 00000000171e7000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: collapse_file+0x1ca/0x5780 mm/khugepaged.c:1715 hpage_collapse_scan_file+0xd6c/0x17a0 mm/khugepaged.c:2156 madvise_collapse+0x53a/0xb40 mm/khugepaged.c:2611 madvise_vma_behavior+0xd0a/0x1cc0 mm/madvise.c:1066 madvise_walk_vmas+0x1c7/0x2b0 mm/madvise.c:1240 do_madvise.part.0+0x24a/0x340 mm/madvise.c:1419 do_madvise mm/madvise.c:1432 [inline] __do_sys_madvise mm/madvise.c:1432 [inline] __se_sys_madvise mm/madvise.c:1430 [inline] __x64_sys_madvise+0x113/0x150 mm/madvise.c:1430 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f6b48a4eef9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 b1 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f6b48ccf318 EFLAGS: 00000246 ORIG_RAX: 000000000000001c RAX: ffffffffffffffda RBX: 00007f6b48af0048 RCX: 00007f6b48a4eef9 RDX: 0000000000000019 RSI: 0000000000600003 RDI: 0000000020000000 RBP: 00007f6b48af0040 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f6b48aa53a4 R13: 00007f6b48bffcbf R14: 00007f6b48ccf400 R15: 0000000000022000 It is because khugepaged allocates pages with __GFP_THISNODE, but the preferred node is bogus. The previous patch fixed the khugepaged code to avoid allocating page from non-existing node. But it is still racy against memory hotremove. There is no synchronization with the memory hotplug so it is possible that memory gets offline during a longer taking scanning. So this warning still seems not quite helpful because: * There is no guarantee the node is online for __GFP_THISNODE context for all the callsites. * Kernel just fails the allocation regardless the warning, and it looks all callsites handle the allocation failure gracefully. Although while the warning has helped to identify a buggy code, it is not safe in general and this warning could panic the system with panic-on-warn configuration which tends to be used surprisingly often. So replace VM_WARN_ON to pr_warn(). And the warning will be triggered if __GFP_NOWARN is set since the allocator would print out warning for such case if __GFP_NOWARN is not set. [shy828301@gmail.com: rename nid to this_node and gfp to warn_gfp] Link: https://lkml.kernel.org/r/20221123193014.153983-1-shy828301@gmail.com [akpm@linux-foundation.org: fix whitespace] [akpm@linux-foundation.org: print gfp_mask instead of warn_gfp, per Michel] Link: https://lkml.kernel.org/r/20221108184357.55614-3-shy828301@gmail.com Fixes: 7d8faaf15545 ("mm/madvise: introduce MADV_COLLAPSE sync hugepage collapse") Signed-off-by: Yang Shi Reported-by: Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- include/linux/gfp.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ef4aea3b356e..65a78773dcca 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); } +static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) +{ + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); + + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) + return; + + if (node_online(this_node)) + return; + + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); + dump_stack(); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). @@ -218,7 +232,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp_mask); return __alloc_pages(gfp_mask, order, nid, NULL); } @@ -227,7 +241,7 @@ static inline struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp); return __folio_alloc(gfp, order, nid, NULL); } From 21b85b09527c28e242db55c1b751f7f7549b830c Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 14 Nov 2022 15:55:05 -0800 Subject: [PATCH 809/963] madvise: use zap_page_range_single for madvise dontneed This series addresses the issue first reported in [1], and fully described in patch 2. Patches 1 and 2 address the user visible issue and are tagged for stable backports. While exploring solutions to this issue, related problems with mmu notification calls were discovered. This is addressed in the patch "hugetlb: remove duplicate mmu notifications:". Since there are no user visible effects, this third is not tagged for stable backports. Previous discussions suggested further cleanup by removing the routine zap_page_range. This is possible because zap_page_range_single is now exported, and all callers of zap_page_range pass ranges entirely within a single vma. This work will be done in a later patch so as not to distract from this bug fix. [1] https://lore.kernel.org/lkml/CAO4mrfdLMXsao9RF4fUE8-Wfde8xmjsKrTNMNC9wjUb6JudD0g@mail.gmail.com/ This patch (of 2): Expose the routine zap_page_range_single to zap a range within a single vma. The madvise routine madvise_dontneed_single_vma can use this routine as it explicitly operates on a single vma. Also, update the mmu notification range in zap_page_range_single to take hugetlb pmd sharing into account. This is required as MADV_DONTNEED supports hugetlb vmas. Link: https://lkml.kernel.org/r/20221114235507.294320-1-mike.kravetz@oracle.com Link: https://lkml.kernel.org/r/20221114235507.294320-2-mike.kravetz@oracle.com Fixes: 90e7e7f5ef3f ("mm: enable MADV_DONTNEED for hugetlb mappings") Signed-off-by: Mike Kravetz Reported-by: Wei Chen Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Mina Almasry Cc: Nadav Amit Cc: Naoya Horiguchi Cc: Peter Xu Cc: Rik van Riel Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 27 +++++++++++++++++++-------- mm/madvise.c | 6 +++--- mm/memory.c | 23 +++++++++++------------ 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..cbfb489d381c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1852,6 +1852,23 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct folio *single_folio; /* Locked folio to be unmapped */ + bool even_cows; /* Zap COWed private pages too? */ + zap_flags_t zap_flags; /* Extra flags for zapping */ +}; + +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else @@ -1869,6 +1886,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details); void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -3467,12 +3486,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -/* - * Whether to drop the pte markers, for example, the uffd-wp information for - * file-backed memory. This should only be specified when we will completely - * drop the page in the mm, either by truncation or unmapping of the vma. By - * default, the flag is not set. - */ -#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) - #endif /* _LINUX_MM_H */ diff --git a/mm/madvise.c b/mm/madvise.c index c7105ec6d08c..b913ba6efc10 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The - * zap_page_range call sets things up for shrink_active_list to actually free - * these pages later if no one else has touched them in the meantime, + * zap_page_range_single call sets things up for shrink_active_list to actually + * free these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * shrink_active_list to pick up before reclaiming other pages. * @@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - zap_page_range(vma, start, end - start); + zap_page_range_single(vma, start, end - start, NULL); return 0; } diff --git a/mm/memory.c b/mm/memory.c index 8a6d5c823f91..9bc5edc35725 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct folio *single_folio; /* Locked folio to be unmapped */ - bool even_cows; /* Zap COWed private pages too? */ - zap_flags_t zap_flags; /* Extra flags for zapping */ -}; - /* Whether we should zap all COWed (private) pages too */ static inline bool should_zap_cows(struct zap_details *details) { @@ -1774,19 +1765,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, * * The range must fit into one VMA. */ -static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { + const unsigned long end = address + size; struct mmu_notifier_range range; struct mmu_gather tlb; lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, address + size); + address, end); + if (is_vm_hugetlb_page(vma)) + adjust_range_if_pmd_sharing_possible(vma, &range.start, + &range.end); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); - unmap_single_vma(&tlb, vma, address, range.end, details); + /* + * unmap 'address-end' not 'range.start-range.end' as range + * could have been expanded for hugetlb pmd sharing. + */ + unmap_single_vma(&tlb, vma, address, end, details); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } From 04ada095dcfc4ae359418053c0be94453bdf1e84 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Mon, 14 Nov 2022 15:55:06 -0800 Subject: [PATCH 810/963] hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing madvise(MADV_DONTNEED) ends up calling zap_page_range() to clear page tables associated with the address range. For hugetlb vmas, zap_page_range will call __unmap_hugepage_range_final. However, __unmap_hugepage_range_final assumes the passed vma is about to be removed and deletes the vma_lock to prevent pmd sharing as the vma is on the way out. In the case of madvise(MADV_DONTNEED) the vma remains, but the missing vma_lock prevents pmd sharing and could potentially lead to issues with truncation/fault races. This issue was originally reported here [1] as a BUG triggered in page_try_dup_anon_rmap. Prior to the introduction of the hugetlb vma_lock, __unmap_hugepage_range_final cleared the VM_MAYSHARE flag to prevent pmd sharing. Subsequent faults on this vma were confused as VM_MAYSHARE indicates a sharable vma, but was not set so page_mapping was not set in new pages added to the page table. This resulted in pages that appeared anonymous in a VM_SHARED vma and triggered the BUG. Address issue by adding a new zap flag ZAP_FLAG_UNMAP to indicate an unmap call from unmap_vmas(). This is used to indicate the 'final' unmapping of a hugetlb vma. When called via MADV_DONTNEED, this flag is not set and the vm_lock is not deleted. [1] https://lore.kernel.org/lkml/CAO4mrfdLMXsao9RF4fUE8-Wfde8xmjsKrTNMNC9wjUb6JudD0g@mail.gmail.com/ Link: https://lkml.kernel.org/r/20221114235507.294320-3-mike.kravetz@oracle.com Fixes: 90e7e7f5ef3f ("mm: enable MADV_DONTNEED for hugetlb mappings") Signed-off-by: Mike Kravetz Reported-by: Wei Chen Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Mina Almasry Cc: Nadav Amit Cc: Naoya Horiguchi Cc: Peter Xu Cc: Rik van Riel Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ mm/hugetlb.c | 27 ++++++++++++++++----------- mm/memory.c | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index cbfb489d381c..974ccca609d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1868,6 +1868,8 @@ struct zap_details { * default, the flag is not set. */ #define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) +/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ +#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) #ifdef CONFIG_MMU extern bool can_do_mlock(void); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f1385c3b6c96..e36ca75311a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5206,17 +5206,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); - /* - * Unlock and free the vma lock before releasing i_mmap_rwsem. When - * the vma_lock is freed, this makes the vma ineligible for pmd - * sharing. And, i_mmap_rwsem is required to set up pmd sharing. - * This is important as page tables for this unmapped range will - * be asynchrously deleted. If the page tables are shared, there - * will be issues when accessed by someone else. - */ - __hugetlb_vma_unlock_write_free(vma); - - i_mmap_unlock_write(vma->vm_file->f_mapping); + if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ + /* + * Unlock and free the vma lock before releasing i_mmap_rwsem. + * When the vma_lock is freed, this makes the vma ineligible + * for pmd sharing. And, i_mmap_rwsem is required to set up + * pmd sharing. This is important as page tables for this + * unmapped range will be asynchrously deleted. If the page + * tables are shared, there will be issues when accessed by + * someone else. + */ + __hugetlb_vma_unlock_write_free(vma); + i_mmap_unlock_write(vma->vm_file->f_mapping); + } else { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, diff --git a/mm/memory.c b/mm/memory.c index 9bc5edc35725..8c8420934d60 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1711,7 +1711,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, { struct mmu_notifier_range range; struct zap_details details = { - .zap_flags = ZAP_FLAG_DROP_MARKER, + .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, /* Careful - we need to zap private pages too! */ .even_cows = true, }; From f0a0ccda18d6fd826d7c7e7ad48a6ed61c20f8b4 Mon Sep 17 00:00:00 2001 From: ZhangPeng Date: Sat, 19 Nov 2022 21:05:42 +0900 Subject: [PATCH 811/963] nilfs2: fix NULL pointer dereference in nilfs_palloc_commit_free_entry() Syzbot reported a null-ptr-deref bug: NILFS (loop0): segctord starting. Construction interval = 5 seconds, CP frequency < 30 seconds general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 1 PID: 3603 Comm: segctord Not tainted 6.1.0-rc2-syzkaller-00105-gb229b6ca5abb #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 RIP: 0010:nilfs_palloc_commit_free_entry+0xe5/0x6b0 fs/nilfs2/alloc.c:608 Code: 00 00 00 00 fc ff df 80 3c 02 00 0f 85 cd 05 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 73 08 49 8d 7e 10 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 26 05 00 00 49 8b 46 10 be a6 00 00 00 48 c7 c7 RSP: 0018:ffffc90003dff830 EFLAGS: 00010212 RAX: dffffc0000000000 RBX: ffff88802594e218 RCX: 000000000000000d RDX: 0000000000000002 RSI: 0000000000002000 RDI: 0000000000000010 RBP: ffff888071880222 R08: 0000000000000005 R09: 000000000000003f R10: 000000000000000d R11: 0000000000000000 R12: ffff888071880158 R13: ffff88802594e220 R14: 0000000000000000 R15: 0000000000000004 FS: 0000000000000000(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fb1c08316a8 CR3: 0000000018560000 CR4: 0000000000350ee0 Call Trace: nilfs_dat_commit_free fs/nilfs2/dat.c:114 [inline] nilfs_dat_commit_end+0x464/0x5f0 fs/nilfs2/dat.c:193 nilfs_dat_commit_update+0x26/0x40 fs/nilfs2/dat.c:236 nilfs_btree_commit_update_v+0x87/0x4a0 fs/nilfs2/btree.c:1940 nilfs_btree_commit_propagate_v fs/nilfs2/btree.c:2016 [inline] nilfs_btree_propagate_v fs/nilfs2/btree.c:2046 [inline] nilfs_btree_propagate+0xa00/0xd60 fs/nilfs2/btree.c:2088 nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337 nilfs_collect_file_data+0x45/0xd0 fs/nilfs2/segment.c:568 nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1018 nilfs_segctor_scan_file+0x3f4/0x6f0 fs/nilfs2/segment.c:1067 nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1197 [inline] nilfs_segctor_collect fs/nilfs2/segment.c:1503 [inline] nilfs_segctor_do_construct+0x12fc/0x6af0 fs/nilfs2/segment.c:2045 nilfs_segctor_construct+0x8e3/0xb30 fs/nilfs2/segment.c:2379 nilfs_segctor_thread_construct fs/nilfs2/segment.c:2487 [inline] nilfs_segctor_thread+0x3c3/0xf30 fs/nilfs2/segment.c:2570 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 ... If DAT metadata file is corrupted on disk, there is a case where req->pr_desc_bh is NULL and blocknr is 0 at nilfs_dat_commit_end() during a b-tree operation that cascadingly updates ancestor nodes of the b-tree, because nilfs_dat_commit_alloc() for a lower level block can initialize the blocknr on the same DAT entry between nilfs_dat_prepare_end() and nilfs_dat_commit_end(). If this happens, nilfs_dat_commit_end() calls nilfs_dat_commit_free() without valid buffer heads in req->pr_desc_bh and req->pr_bitmap_bh, and causes the NULL pointer dereference above in nilfs_palloc_commit_free_entry() function, which leads to a crash. Fix this by adding a NULL check on req->pr_desc_bh and req->pr_bitmap_bh before nilfs_palloc_commit_free_entry() in nilfs_dat_commit_free(). This also calls nilfs_error() in that case to notify that there is a fatal flaw in the filesystem metadata and prevent further operations. Link: https://lkml.kernel.org/r/00000000000097c20205ebaea3d6@google.com Link: https://lkml.kernel.org/r/20221114040441.1649940-1-zhangpeng362@huawei.com Link: https://lkml.kernel.org/r/20221119120542.17204-1-konishi.ryusuke@gmail.com Signed-off-by: ZhangPeng Signed-off-by: Ryusuke Konishi Reported-by: syzbot+ebe05ee8e98f755f61d0@syzkaller.appspotmail.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/dat.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 3b55e239705f..9930fa901039 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } From a435874bf626f55d7147026b059008c8de89fbb8 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 19 Nov 2022 10:36:59 +0800 Subject: [PATCH 812/963] tools/vm/slabinfo-gnuplot: use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this up by moving the related file to use "grep -E" instead. sed -i "s/egrep/grep -E/g" `grep egrep -rwl tools/vm` Here are the steps to install the latest grep: wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz tar xf grep-3.8.tar.gz cd grep-3.8 && ./configure && make sudo make install export PATH=/usr/local/bin:$PATH Link: https://lkml.kernel.org/r/1668825419-30584-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Sergey Senozhatsky Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- tools/vm/slabinfo-gnuplot.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 26e193ffd2a2..873a892147e5 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -150,7 +150,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-loss" `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" @@ -159,7 +159,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-size" `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" From 95bc35f9bee5220dad4e8567654ab3288a181639 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 22 Nov 2022 19:48:31 +0000 Subject: [PATCH 813/963] mm/damon/sysfs: fix wrong empty schemes assumption under online tuning in damon_sysfs_set_schemes() Commit da87878010e5 ("mm/damon/sysfs: support online inputs update") made 'damon_sysfs_set_schemes()' to be called for running DAMON context, which could have schemes. In the case, DAMON sysfs interface is supposed to update, remove, or add schemes to reflect the sysfs files. However, the code is assuming the DAMON context wouldn't have schemes at all, and therefore creates and adds new schemes. As a result, the code doesn't work as intended for online schemes tuning and could have more than expected memory footprint. The schemes are all in the DAMON context, so it doesn't leak the memory, though. Remove the wrong asssumption (the DAMON context wouldn't have schemes) in 'damon_sysfs_set_schemes()' to fix the bug. Link: https://lkml.kernel.org/r/20221122194831.3472-1-sj@kernel.org Fixes: da87878010e5 ("mm/damon/sysfs: support online inputs update") Signed-off-by: SeongJae Park Cc: [5.19+] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 5ce403378c20..07e5f1bdf025 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme( &wmarks); } +static void damon_sysfs_update_scheme(struct damos *scheme, + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + scheme->pattern.min_sz_region = access_pattern->sz->min; + scheme->pattern.max_sz_region = access_pattern->sz->max; + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; + scheme->pattern.min_age_region = access_pattern->age->min; + scheme->pattern.max_age_region = access_pattern->age->max; + + scheme->action = sysfs_scheme->action; + + scheme->quota.ms = sysfs_quotas->ms; + scheme->quota.sz = sysfs_quotas->sz; + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; + scheme->quota.weight_sz = sysfs_weights->sz; + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; + scheme->quota.weight_age = sysfs_weights->age; + + scheme->wmarks.metric = sysfs_wmarks->metric; + scheme->wmarks.interval = sysfs_wmarks->interval_us; + scheme->wmarks.high = sysfs_wmarks->high; + scheme->wmarks.mid = sysfs_wmarks->mid; + scheme->wmarks.low = sysfs_wmarks->low; +} + static int damon_sysfs_set_schemes(struct damon_ctx *ctx, struct damon_sysfs_schemes *sysfs_schemes) { - int i; + struct damos *scheme, *next; + int i = 0; - for (i = 0; i < sysfs_schemes->nr; i++) { + damon_for_each_scheme_safe(scheme, next, ctx) { + if (i < sysfs_schemes->nr) + damon_sysfs_update_scheme(scheme, + sysfs_schemes->schemes_arr[i]); + else + damon_destroy_scheme(scheme); + i++; + } + + for (; i < sysfs_schemes->nr; i++) { struct damos *scheme, *next; scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); From 6617da8fb565445e0be4a4885443006374943d09 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 30 Nov 2022 14:49:41 -0800 Subject: [PATCH 814/963] mm: add dummy pmd_young() for architectures not having it In order to avoid #ifdeffery add a dummy pmd_young() implementation as a fallback. This is required for the later patch "mm: introduce arch_has_hw_nonleaf_pmd_young()". Link: https://lkml.kernel.org/r/fd3ac3cd-7349-6bbd-890a-71a9454ca0b3@suse.com Signed-off-by: Juergen Gross Acked-by: Yu Zhao Cc: Borislav Petkov Cc: Dave Hansen Cc: Geert Uytterhoeven Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Sander Eikelenboom Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/loongarch/include/asm/pgtable.h | 1 + arch/mips/include/asm/pgtable.h | 1 + arch/riscv/include/asm/pgtable.h | 1 + arch/s390/include/asm/pgtable.h | 1 + arch/sparc/include/asm/pgtable_64.h | 1 + arch/x86/include/asm/pgtable.h | 1 + include/linux/pgtable.h | 7 +++++++ 7 files changed, 13 insertions(+) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 946704bee599..10e0bd9009e2 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -482,6 +482,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 6caec386ad2f..4678627673df 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..92ec2d9d7273 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd) return pte_dirty(pmd_pte(pmd)); } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pte_young(pmd_pte(pmd)); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..11e901286414 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index a779418ceba9..3bc9736bddb1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd) return pte_dirty(pte); } +#define pmd_young pmd_young static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5059799bebe3..1d55af8d82b9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd) return pmd_flags(pmd) & _PAGE_DIRTY; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..6b0d59269b33 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, From 4aaf269c768dbacd6268af73fda2ffccaa3f1d88 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 23 Nov 2022 07:45:10 +0100 Subject: [PATCH 815/963] mm: introduce arch_has_hw_nonleaf_pmd_young() When running as a Xen PV guests commit eed9a328aa1a ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") can cause a protection violation in pmdp_test_and_clear_young(): BUG: unable to handle page fault for address: ffff8880083374d0 #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation PGD 3026067 P4D 3026067 PUD 3027067 PMD 7fee5067 PTE 8010000008337065 Oops: 0003 [#1] PREEMPT SMP NOPTI CPU: 7 PID: 158 Comm: kswapd0 Not tainted 6.1.0-rc5-20221118-doflr+ #1 RIP: e030:pmdp_test_and_clear_young+0x25/0x40 This happens because the Xen hypervisor can't emulate direct writes to page table entries other than PTEs. This can easily be fixed by introducing arch_has_hw_nonleaf_pmd_young() similar to arch_has_hw_pte_young() and test that instead of CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG. Link: https://lkml.kernel.org/r/20221123064510.16225-1-jgross@suse.com Fixes: eed9a328aa1a ("mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG") Signed-off-by: Juergen Gross Reported-by: Sander Eikelenboom Acked-by: Yu Zhao Tested-by: Sander Eikelenboom Acked-by: David Hildenbrand [core changes] Signed-off-by: Andrew Morton --- arch/x86/include/asm/pgtable.h | 8 ++++++++ include/linux/pgtable.h | 11 +++++++++++ mm/vmscan.c | 10 +++++----- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1d55af8d82b9..286a71810f9e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1439,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 6b0d59269b33..5f0d7d0b9471 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -267,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. diff --git a/mm/vmscan.c b/mm/vmscan.c index 026199c047e0..8fcc5fa768c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3987,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; @@ -4085,14 +4085,14 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, #endif walk->mm_stats[MM_NONLEAF_TOTAL]++; -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (arch_has_hw_nonleaf_pmd_young() && + get_cap(LRU_GEN_NONLEAF_YOUNG)) { if (!pmd_young(val)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); } -#endif + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) continue; @@ -5392,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) caps |= BIT(LRU_GEN_MM_WALK); - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); From 829ae0f81ce093d674ff2256f66a714753e9ce32 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 24 Nov 2022 17:55:23 +0800 Subject: [PATCH 816/963] mm: migrate: fix THP's mapcount on isolation The issue is reported when removing memory through virtio_mem device. The transparent huge page, experienced copy-on-write fault, is wrongly regarded as pinned. The transparent huge page is escaped from being isolated in isolate_migratepages_block(). The transparent huge page can't be migrated and the corresponding memory block can't be put into offline state. Fix it by replacing page_mapcount() with total_mapcount(). With this, the transparent huge page can be isolated and migrated, and the memory block can be put into offline state. Besides, The page's refcount is increased a bit earlier to avoid the page is released when the check is executed. Link: https://lkml.kernel.org/r/20221124095523.31061-1-gshan@redhat.com Fixes: 1da2f328fa64 ("mm,thp,compaction,cma: allow THP migration for CMA allocations") Signed-off-by: Gavin Shan Reported-by: Zhenyu Zhang Tested-by: Zhenyu Zhang Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Alistair Popple Cc: Hugh Dickins Cc: Kirill A. Shutemov Cc: Matthew Wilcox Cc: William Kucharski Cc: Zi Yan Cc: [5.7+] Signed-off-by: Andrew Morton --- mm/compaction.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/mm/compaction.c b/mm/compaction.c index c51f7f545afe..1f6da31dd9a5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -984,22 +984,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, goto isolate_fail; } - /* - * Migration will fail if an anonymous page is pinned in memory, - * so avoid taking lru_lock and isolating it unnecessarily in an - * admittedly racy check. - */ - mapping = page_mapping(page); - if (!mapping && page_count(page) > page_mapcount(page)) - goto isolate_fail; - - /* - * Only allow to migrate anonymous pages in GFP_NOFS context - * because those do not depend on fs locks. - */ - if (!(cc->gfp_mask & __GFP_FS) && mapping) - goto isolate_fail; - /* * Be careful not to clear PageLRU until after we're * sure the page is not being freed elsewhere -- the @@ -1008,6 +992,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (unlikely(!get_page_unless_zero(page))) goto isolate_fail; + /* + * Migration will fail if an anonymous page is pinned in memory, + * so avoid taking lru_lock and isolating it unnecessarily in an + * admittedly racy check. + */ + mapping = page_mapping(page); + if (!mapping && (page_count(page) - 1) > total_mapcount(page)) + goto isolate_fail_put; + + /* + * Only allow to migrate anonymous pages in GFP_NOFS context + * because those do not depend on fs locks. + */ + if (!(cc->gfp_mask & __GFP_FS) && mapping) + goto isolate_fail_put; + /* Only take pages on LRU: a check now makes later tests safe */ if (!PageLRU(page)) goto isolate_fail_put; From 8d3c106e19e8d251da31ff4cc7462e4565d65084 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 25 Nov 2022 22:37:12 +0100 Subject: [PATCH 817/963] mm/khugepaged: take the right locks for page table retraction pagetable walks on address ranges mapped by VMAs can be done under the mmap lock, the lock of an anon_vma attached to the VMA, or the lock of the VMA's address_space. Only one of these needs to be held, and it does not need to be held in exclusive mode. Under those circumstances, the rules for concurrent access to page table entries are: - Terminal page table entries (entries that don't point to another page table) can be arbitrarily changed under the page table lock, with the exception that they always need to be consistent for hardware page table walks and lockless_pages_from_mm(). This includes that they can be changed into non-terminal entries. - Non-terminal page table entries (which point to another page table) can not be modified; readers are allowed to READ_ONCE() an entry, verify that it is non-terminal, and then assume that its value will stay as-is. Retracting a page table involves modifying a non-terminal entry, so page-table-level locks are insufficient to protect against concurrent page table traversal; it requires taking all the higher-level locks under which it is possible to start a page walk in the relevant range in exclusive mode. The collapse_huge_page() path for anonymous THP already follows this rule, but the shmem/file THP path was getting it wrong, making it possible for concurrent rmap-based operations to cause corruption. Link: https://lkml.kernel.org/r/20221129154730.2274278-1-jannh@google.com Link: https://lkml.kernel.org/r/20221128180252.1684965-1-jannh@google.com Link: https://lkml.kernel.org/r/20221125213714.4115729-1-jannh@google.com Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP") Signed-off-by: Jann Horn Reviewed-by: Yang Shi Acked-by: David Hildenbrand Cc: John Hubbard Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/khugepaged.c | 55 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a8d5ef2a77d2..0a11e132ad6b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1379,16 +1379,37 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr, return SCAN_SUCCEED; } +/* + * A note about locking: + * Trying to take the page table spinlocks would be useless here because those + * are only used to synchronize: + * + * - modifying terminal entries (ones that point to a data page, not to another + * page table) + * - installing *new* non-terminal entries + * + * Instead, we need roughly the same kind of protection as free_pgtables() or + * mm_take_all_locks() (but only for a single VMA): + * The mmap lock together with this VMA's rmap locks covers all paths towards + * the page table entries we're messing with here, except for hardware page + * table walks and lockless_pages_from_mm(). + */ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - spinlock_t *ptl; pmd_t pmd; mmap_assert_write_locked(mm); - ptl = pmd_lock(vma->vm_mm, pmdp); + if (vma->vm_file) + lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem); + /* + * All anon_vmas attached to the VMA have the same root and are + * therefore locked by the same lock. + */ + if (vma->anon_vma) + lockdep_assert_held_write(&vma->anon_vma->root->rwsem); + pmd = pmdp_collapse_flush(vma, addr, pmdp); - spin_unlock(ptl); mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pmd); pte_free(mm, pmd_pgtable(pmd)); @@ -1439,6 +1460,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; + /* + * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings + * that got written to. Without this, we'd have to also lock the + * anon_vma if one exists. + */ + if (vma->anon_vma) + return SCAN_VMA_CHECK; + /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1472,6 +1501,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto drop_hpage; } + /* + * We need to lock the mapping so that from here on, only GUP-fast and + * hardware page walks can access the parts of the page tables that + * we're operating on. + * See collapse_and_free_pmd(). + */ + i_mmap_lock_write(vma->vm_file->f_mapping); + + /* + * This spinlock should be unnecessary: Nobody else should be accessing + * the page tables under spinlock protection here, only + * lockless_pages_from_mm() and the hardware page walker can access page + * tables while all the high-level locks are held in write mode. + */ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); result = SCAN_FAIL; @@ -1526,6 +1569,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, /* step 4: remove pte entries */ collapse_and_free_pmd(mm, vma, haddr, pmd); + i_mmap_unlock_write(vma->vm_file->f_mapping); + maybe_install_pmd: /* step 5: install pmd entry */ result = install_pmd @@ -1539,6 +1584,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, abort: pte_unmap_unlock(start_pte, ptl); + i_mmap_unlock_write(vma->vm_file->f_mapping); goto drop_hpage; } @@ -1595,7 +1641,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * An alternative would be drop the check, but check that page * table is clear before calling pmdp_collapse_flush() under * ptl. It has higher chance to recover THP for the VMA, but - * has higher cost too. + * has higher cost too. It would also probably require locking + * the anon_vma. */ if (vma->anon_vma) { result = SCAN_PAGE_ANON; From 2ba99c5e08812494bc57f319fb562f527d9bacd8 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 25 Nov 2022 22:37:13 +0100 Subject: [PATCH 818/963] mm/khugepaged: fix GUP-fast interaction by sending IPI Since commit 70cbc3cc78a99 ("mm: gup: fix the fast GUP race against THP collapse"), the lockless_pages_from_mm() fastpath rechecks the pmd_t to ensure that the page table was not removed by khugepaged in between. However, lockless_pages_from_mm() still requires that the page table is not concurrently freed. Fix it by sending IPIs (if the architecture uses semi-RCU-style page table freeing) before freeing/reusing page tables. Link: https://lkml.kernel.org/r/20221129154730.2274278-2-jannh@google.com Link: https://lkml.kernel.org/r/20221128180252.1684965-2-jannh@google.com Link: https://lkml.kernel.org/r/20221125213714.4115729-2-jannh@google.com Fixes: ba76149f47d8 ("thp: khugepaged") Signed-off-by: Jann Horn Reviewed-by: Yang Shi Acked-by: David Hildenbrand Cc: John Hubbard Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- include/asm-generic/tlb.h | 4 ++++ mm/khugepaged.c | 2 ++ mm/mmu_gather.c | 4 +--- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 492dce43236e..cab7cfebf40b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #define tlb_needs_table_invalidate() (true) #endif +void tlb_remove_table_sync_one(void); + #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif +static inline void tlb_remove_table_sync_one(void) { } + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 0a11e132ad6b..294cb75d9c22 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1051,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(&range); + tlb_remove_table_sync_one(); spin_lock(pte_ptl); result = __collapse_huge_page_isolate(vma, address, pte, cc, @@ -1410,6 +1411,7 @@ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *v lockdep_assert_held_write(&vma->anon_vma->root->rwsem); pmd = pmdp_collapse_flush(vma, addr, pmdp); + tlb_remove_table_sync_one(); mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pmd); pte_free(mm, pmd_pgtable(pmd)); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index add4244e5790..3a2c3f8cad2f 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg) /* Simply deliver the interrupt */ } -static void tlb_remove_table_sync_one(void) +void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ -static void tlb_remove_table_sync_one(void) { } - static void tlb_remove_table_free(struct mmu_table_batch *batch) { __tlb_remove_table_free(batch); From f268f6cf875f3220afc77bdd0bf1bb136eb54db9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 25 Nov 2022 22:37:14 +0100 Subject: [PATCH 819/963] mm/khugepaged: invoke MMU notifiers in shmem/file collapse paths Any codepath that zaps page table entries must invoke MMU notifiers to ensure that secondary MMUs (like KVM) don't keep accessing pages which aren't mapped anymore. Secondary MMUs don't hold their own references to pages that are mirrored over, so failing to notify them can lead to page use-after-free. I'm marking this as addressing an issue introduced in commit f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages"), but most of the security impact of this only came in commit 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP"), which actually omitted flushes for the removal of present PTEs, not just for the removal of empty page tables. Link: https://lkml.kernel.org/r/20221129154730.2274278-3-jannh@google.com Link: https://lkml.kernel.org/r/20221128180252.1684965-3-jannh@google.com Link: https://lkml.kernel.org/r/20221125213714.4115729-3-jannh@google.com Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages") Signed-off-by: Jann Horn Acked-by: David Hildenbrand Reviewed-by: Yang Shi Cc: John Hubbard Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/khugepaged.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 294cb75d9c22..3703a56571c1 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1399,6 +1399,7 @@ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *v unsigned long addr, pmd_t *pmdp) { pmd_t pmd; + struct mmu_notifier_range range; mmap_assert_write_locked(mm); if (vma->vm_file) @@ -1410,8 +1411,12 @@ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *v if (vma->anon_vma) lockdep_assert_held_write(&vma->anon_vma->root->rwsem); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr, + addr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); pmd = pmdp_collapse_flush(vma, addr, pmdp); tlb_remove_table_sync_one(); + mmu_notifier_invalidate_range_end(&range); mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pmd); pte_free(mm, pmd_pgtable(pmd)); From 6f6cb1714365a07dbc66851879538df9f6969288 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 25 Nov 2022 12:07:49 +0000 Subject: [PATCH 820/963] drm/amdgpu: temporarily disable broken Clang builds due to blown stack-frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Fix a bunch of allmodconfig errors", v2. Since b339ec9c229aa ("kbuild: Only default to -Werror if COMPILE_TEST") WERROR now defaults to COMPILE_TEST meaning that it's enabled for allmodconfig builds. This leads to some interesting build failures when using Clang, each resolved in this set. With this set applied, I am able to obtain a successful allmodconfig Arm build. This patch (of 2): calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) architectures built with Clang (all released versions), whereby the stack frame gets blown up to well over 5k. This would cause an immediate kernel panic on most architectures. We'll revert this when the following bug report has been resolved: https://github.com/llvm/llvm-project/issues/41896. Link: https://lkml.kernel.org/r/20221125120750.3537134-1-lee@kernel.org Link: https://lkml.kernel.org/r/20221125120750.3537134-2-lee@kernel.org Signed-off-by: Lee Jones Suggested-by: Arnd Bergmann Acked-by: Arnd Bergmann Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: David Airlie Cc: Harry Wentland Cc: Lee Jones Cc: Leo Li Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Pan, Xinhui" Cc: Rodrigo Siqueira Cc: Thomas Zimmermann Cc: Tom Rix Cc: Signed-off-by: Andrew Morton --- drivers/gpu/drm/amd/display/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 6925e0280dbe..f4f3d2665a6b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) help @@ -12,6 +13,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help From 152fe65f300e1819d59b80477d3e0999b4d5d7d2 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 25 Nov 2022 12:07:50 +0000 Subject: [PATCH 821/963] Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When enabled, KASAN enlarges function's stack-frames. Pushing quite a few over the current threshold. This can mainly be seen on 32-bit architectures where the present limit (when !GCC) is a lowly 1024-Bytes. Link: https://lkml.kernel.org/r/20221125120750.3537134-3-lee@kernel.org Signed-off-by: Lee Jones Acked-by: Arnd Bergmann Cc: Alex Deucher Cc: "Christian König" Cc: Daniel Vetter Cc: David Airlie Cc: Harry Wentland Cc: Leo Li Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: "Pan, Xinhui" Cc: Rodrigo Siqueira Cc: Thomas Zimmermann Cc: Tom Rix Cc: Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a1005415f0f4..580e453e284e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -399,6 +399,7 @@ config FRAME_WARN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help From 1d351f1894342c378b96bb9ed89f8debb1e24e9f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 28 Nov 2022 13:21:40 -0800 Subject: [PATCH 822/963] revert "kbuild: fix -Wimplicit-function-declaration in license_is_gpl_compatible" It causes build failures with unusual CC/HOSTCC combinations. Quoting https://lkml.kernel.org/r/A222B1E6-69B8-4085-AD1B-27BDB72CA971@goldelico.com: HOSTCC scripts/mod/modpost.o - due to target missing In file included from include/linux/string.h:5, from scripts/mod/../../include/linux/license.h:5, from scripts/mod/modpost.c:24: include/linux/compiler.h:246:10: fatal error: asm/rwonce.h: No such file or directory 246 | #include | ^~~~~~~~~~~~~~ compilation terminated. ... The problem is that HOSTCC is not necessarily the same compiler or even architecture as CC and pulling in or files indirectly isn't a good idea then. My toolchain is providing HOSTCC = gcc (MacPorts) and CC = arm-linux-gnueabihf (built from gcc source) and all running on Darwin. If I change the include to I can then "HOSTCC scripts/mod/modpost.c" but then it fails for "CC kernel/module/main.c" not finding : CC kernel/module/main.o - due to target missing In file included from kernel/module/main.c:43:0: ./include/linux/license.h:5:20: fatal error: string.h: No such file or directory #include ^ compilation terminated. Reported-by: "H. Nikolaus Schaller" Cc: Sam James Signed-off-by: Andrew Morton --- include/linux/license.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/license.h b/include/linux/license.h index ad937f57f2cb..7cce390f120b 100644 --- a/include/linux/license.h +++ b/include/linux/license.h @@ -2,8 +2,6 @@ #ifndef __LICENSE_H #define __LICENSE_H -#include - static inline int license_is_gpl_compatible(const char *license) { return (strcmp(license, "GPL") == 0 From 421f8663b3a775c32f724f793264097c60028f2e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 25 Nov 2022 19:50:03 +0800 Subject: [PATCH 823/963] net: broadcom: Add PTP_1588_CLOCK_OPTIONAL dependency for BCMGENET under ARCH_BCM2835 commit 8d820bc9d12b ("net: broadcom: Fix BCMGENET Kconfig") fixes the build that contain 99addbe31f55 ("net: broadcom: Select BROADCOM_PHY for BCMGENET") and enable BCMGENET=y but PTP_1588_CLOCK_OPTIONAL=m, which otherwise leads to a link failure. However this may trigger a runtime failure. Fix the original issue by propagating the PTP_1588_CLOCK_OPTIONAL dependency of BROADCOM_PHY down to BCMGENET. Fixes: 8d820bc9d12b ("net: broadcom: Fix BCMGENET Kconfig") Fixes: 99addbe31f55 ("net: broadcom: Select BROADCOM_PHY for BCMGENET") Reported-by: Naresh Kamboju Suggested-by: Arnd Bergmann Signed-off-by: YueHaibing Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20221125115003.30308-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 55dfdb34e37b..f4ca0c6c0f51 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -71,13 +71,14 @@ config BCM63XX_ENET config BCMGENET tristate "Broadcom GENET internal MAC support" depends on HAS_IOMEM + depends on PTP_1588_CLOCK_OPTIONAL || !ARCH_BCM2835 select MII select PHYLIB select FIXED_PHY select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB - select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL) + select BROADCOM_PHY if ARCH_BCM2835 help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. From 6c681f899e0360803b924ac8c96ee21965118649 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Tue, 29 Nov 2022 10:36:39 +0530 Subject: [PATCH 824/963] net: ethernet: ti: am65-cpsw: Fix RGMII configuration at SPEED_10 The am65-cpsw driver supports configuring all RGMII variants at interface speed of 10 Mbps. However, in the process of shifting to the PHYLINK framework, the support for all variants of RGMII except the PHY_INTERFACE_MODE_RGMII variant was accidentally removed. Fix this by using phy_interface_mode_is_rgmii() to check for all variants of RGMII mode. Fixes: e8609e69470f ("net: ethernet: ti: am65-cpsw: Convert to PHYLINK") Reported-by: Schuyler Patton Signed-off-by: Siddharth Vadapalli Link: https://lore.kernel.org/r/20221129050639.111142-1-s-vadapalli@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index d04a239c658e..b3b0ba842541 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1454,7 +1454,7 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy if (speed == SPEED_1000) mac_control |= CPSW_SL_CTL_GIG; - if (speed == SPEED_10 && interface == PHY_INTERFACE_MODE_RGMII) + if (speed == SPEED_10 && phy_interface_mode_is_rgmii(interface)) /* Can be used with in band mode only */ mac_control |= CPSW_SL_CTL_EXT_EN; if (speed == SPEED_100 && interface == PHY_INTERFACE_MODE_RMII) From d3d6b1bf85aefe0ebc0624574b3bb62f0693914c Mon Sep 17 00:00:00 2001 From: Guillaume BRUN Date: Wed, 16 Nov 2022 15:35:23 +0100 Subject: [PATCH 825/963] drm: bridge: dw_hdmi: fix preference of RGB modes over YUV420 Cheap monitors sometimes advertise YUV modes they don't really have (HDMI specification mandates YUV support so even monitors without actual support will often wrongfully advertise it) which results in YUV matches and user forum complaints of a red tint to light colour display areas in common desktop environments. Moving the default RGB fall-back before YUV selection results in RGB mode matching in most cases, reducing complaints. Fixes: 6c3c719936da ("drm/bridge: synopsys: dw-hdmi: add bus format negociation") Signed-off-by: Guillaume BRUN Tested-by: Christian Hewitt Reviewed-by: Robert Foss Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20221116143523.2126-1-the.cheaterman@gmail.com --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 40d8ca37f5bc..aa51c61a78c7 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2720,6 +2720,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, * if supported. In any case the default RGB888 format is added */ + /* Default 8bit RGB fallback */ + output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; + if (max_bpc >= 16 && info->bpc == 16) { if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444) output_fmts[i++] = MEDIA_BUS_FMT_YUV16_1X48; @@ -2753,9 +2756,6 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444) output_fmts[i++] = MEDIA_BUS_FMT_YUV8_1X24; - /* Default 8bit RGB fallback */ - output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24; - *num_output_fmts = i; return output_fmts; From b3abe42e94900bdd045c472f9c9be620ba5ce553 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 27 Nov 2022 10:24:11 +0900 Subject: [PATCH 826/963] af_unix: Get user_ns from in_skb in unix_diag_get_exact(). Wei Chen reported a NULL deref in sk_user_ns() [0][1], and Paolo diagnosed the root cause: in unix_diag_get_exact(), the newly allocated skb does not have sk. [2] We must get the user_ns from the NETLINK_CB(in_skb).sk and pass it to sk_diag_fill(). [0]: BUG: kernel NULL pointer dereference, address: 0000000000000270 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 12bbce067 P4D 12bbce067 PUD 12bc40067 PMD 0 Oops: 0000 [#1] PREEMPT SMP CPU: 0 PID: 27942 Comm: syz-executor.0 Not tainted 6.1.0-rc5-next-20221118 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014 RIP: 0010:sk_user_ns include/net/sock.h:920 [inline] RIP: 0010:sk_diag_dump_uid net/unix/diag.c:119 [inline] RIP: 0010:sk_diag_fill+0x77d/0x890 net/unix/diag.c:170 Code: 89 ef e8 66 d4 2d fd c7 44 24 40 00 00 00 00 49 8d 7c 24 18 e8 54 d7 2d fd 49 8b 5c 24 18 48 8d bb 70 02 00 00 e8 43 d7 2d fd <48> 8b 9b 70 02 00 00 48 8d 7b 10 e8 33 d7 2d fd 48 8b 5b 10 48 8d RSP: 0018:ffffc90000d67968 EFLAGS: 00010246 RAX: ffff88812badaa48 RBX: 0000000000000000 RCX: ffffffff840d481d RDX: 0000000000000465 RSI: 0000000000000000 RDI: 0000000000000270 RBP: ffffc90000d679a8 R08: 0000000000000277 R09: 0000000000000000 R10: 0001ffffffffffff R11: 0001c90000d679a8 R12: ffff88812ac03800 R13: ffff88812c87c400 R14: ffff88812ae42210 R15: ffff888103026940 FS: 00007f08b4e6f700(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000270 CR3: 000000012c58b000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unix_diag_get_exact net/unix/diag.c:285 [inline] unix_diag_handler_dump+0x3f9/0x500 net/unix/diag.c:317 __sock_diag_cmd net/core/sock_diag.c:235 [inline] sock_diag_rcv_msg+0x237/0x250 net/core/sock_diag.c:266 netlink_rcv_skb+0x13e/0x250 net/netlink/af_netlink.c:2564 sock_diag_rcv+0x24/0x40 net/core/sock_diag.c:277 netlink_unicast_kernel net/netlink/af_netlink.c:1330 [inline] netlink_unicast+0x5e9/0x6b0 net/netlink/af_netlink.c:1356 netlink_sendmsg+0x739/0x860 net/netlink/af_netlink.c:1932 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x38f/0x500 net/socket.c:2476 ___sys_sendmsg net/socket.c:2530 [inline] __sys_sendmsg+0x197/0x230 net/socket.c:2559 __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x4697f9 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f08b4e6ec48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000077bf80 RCX: 00000000004697f9 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 00000000004d29e9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000077bf80 R13: 0000000000000000 R14: 000000000077bf80 R15: 00007ffdb36bc6c0 Modules linked in: CR2: 0000000000000270 [1]: https://lore.kernel.org/netdev/CAO4mrfdvyjFpokhNsiwZiP-wpdSD0AStcJwfKcKQdAALQ9_2Qw@mail.gmail.com/ [2]: https://lore.kernel.org/netdev/e04315e7c90d9a75613f3993c2baf2d344eef7eb.camel@redhat.com/ Fixes: cae9910e7344 ("net: Add UNIX_DIAG_UID to Netlink UNIX socket diagnostics.") Reported-by: syzbot Reported-by: Wei Chen Diagnosed-by: Paolo Abeni Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/diag.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/net/unix/diag.c b/net/unix/diag.c index 105f522a89fe..616b55c5b890 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -114,14 +114,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } -static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb) +static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb, + struct user_namespace *user_ns) { - uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk)); + uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk)); return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags, int sk_ino) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; @@ -167,7 +169,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_UID) && - sk_diag_dump_uid(sk, skb)) + sk_diag_dump_uid(sk, skb, user_ns)) goto out_nlmsg_trim; nlmsg_end(skb, nlh); @@ -179,7 +181,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -190,7 +193,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -214,7 +217,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) goto next; - if (sk_diag_dump(sk, skb, req, + if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) { @@ -282,7 +285,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, + err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); From ac011361bd4f6206b36d2a242500239881bf8d2a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 27 Nov 2022 10:24:12 +0900 Subject: [PATCH 827/963] af_unix: Add test for sock_diag and UDIAG_SHOW_UID. The test prog dumps a single AF_UNIX socket's UID with and without unshare(CLONE_NEWUSER) and checks if it matches the result of getuid(). Without the preceding patch, the test prog is killed by a NULL deref in sk_diag_dump_uid(). # ./diag_uid TAP version 13 1..2 # Starting 2 tests from 3 test cases. # RUN diag_uid.uid.1 ... BUG: kernel NULL pointer dereference, address: 0000000000000270 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 105212067 P4D 105212067 PUD 1051fe067 PMD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.amzn2022.0.1 04/01/2014 RIP: 0010:sk_diag_fill (./include/net/sock.h:920 net/unix/diag.c:119 net/unix/diag.c:170) ... # 1: Test terminated unexpectedly by signal 9 # FAIL diag_uid.uid.1 not ok 1 diag_uid.uid.1 # RUN diag_uid.uid_unshare.1 ... # 1: Test terminated by timeout # FAIL diag_uid.uid_unshare.1 not ok 2 diag_uid.uid_unshare.1 # FAILED: 0 / 2 tests passed. # Totals: pass:0 fail:2 xfail:0 xpass:0 skip:0 error:0 With the patch, the test succeeds. # ./diag_uid TAP version 13 1..2 # Starting 2 tests from 3 test cases. # RUN diag_uid.uid.1 ... # OK diag_uid.uid.1 ok 1 diag_uid.uid.1 # RUN diag_uid.uid_unshare.1 ... # OK diag_uid.uid_unshare.1 ok 2 diag_uid.uid_unshare.1 # PASSED: 2 / 2 tests passed. # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/af_unix/Makefile | 2 +- .../testing/selftests/net/af_unix/diag_uid.c | 178 ++++++++++++++++++ 3 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/af_unix/diag_uid.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 3d7adee7a3e6..ff8fe93f679c 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash cmsg_sender +diag_uid fin_ack_lat gro hwtstamp_config diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 969620ae9928..1e4b397cece6 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,3 +1,3 @@ -TEST_GEN_PROGS := test_unix_oob unix_connect +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c new file mode 100644 index 000000000000..5b88f7129fea --- /dev/null +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest_harness.h" + +FIXTURE(diag_uid) +{ + int netlink_fd; + int unix_fd; + __u32 inode; + __u64 cookie; +}; + +FIXTURE_VARIANT(diag_uid) +{ + int unshare; + int udiag_show; +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid) +{ + .unshare = 0, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid_unshare) +{ + .unshare = CLONE_NEWUSER, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_SETUP(diag_uid) +{ + struct stat file_stat; + socklen_t optlen; + int ret; + + if (variant->unshare) + ASSERT_EQ(unshare(variant->unshare), 0); + + self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + ASSERT_NE(self->netlink_fd, -1); + + self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_NE(self->unix_fd, -1); + + ret = fstat(self->unix_fd, &file_stat); + ASSERT_EQ(ret, 0); + + self->inode = file_stat.st_ino; + + optlen = sizeof(self->cookie); + ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(diag_uid) +{ + close(self->netlink_fd); + close(self->unix_fd); +} + +int send_request(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self, + const FIXTURE_VARIANT(diag_uid) *variant) +{ + struct { + struct nlmsghdr nlh; + struct unix_diag_req udr; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .udr = { + .sdiag_family = AF_UNIX, + .udiag_ino = self->inode, + .udiag_cookie = { + (__u32)self->cookie, + (__u32)(self->cookie >> 32) + }, + .udiag_show = variant->udiag_show + } + }; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = &req, + .iov_len = sizeof(req) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + + return sendmsg(self->netlink_fd, &msg, 0); +} + +void render_response(struct __test_metadata *_metadata, + struct unix_diag_req *udr, __u32 len) +{ + unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr)); + struct rtattr *attr; + uid_t uid; + + ASSERT_GT(len, sizeof(*udr)); + ASSERT_EQ(udr->sdiag_family, AF_UNIX); + + attr = (struct rtattr *)(udr + 1); + ASSERT_NE(RTA_OK(attr, rta_len), 0); + ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID); + + uid = *(uid_t *)RTA_DATA(attr); + ASSERT_EQ(uid, getuid()); +} + +void receive_response(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self) +{ + long buf[8192 / sizeof(long)]; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + struct unix_diag_req *udr; + struct nlmsghdr *nlh; + int ret; + + ret = recvmsg(self->netlink_fd, &msg, 0); + ASSERT_GT(ret, 0); + + nlh = (struct nlmsghdr *)buf; + ASSERT_NE(NLMSG_OK(nlh, ret), 0); + ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY); + + render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len); + + nlh = NLMSG_NEXT(nlh, ret); + ASSERT_EQ(NLMSG_OK(nlh, ret), 0); +} + +TEST_F(diag_uid, 1) +{ + int ret; + + ret = send_request(_metadata, self, variant); + ASSERT_GT(ret, 0); + + receive_response(_metadata, self); +} + +TEST_HARNESS_MAIN From dd30dcfa7a74a06f8dcdab260d8d5adf32f17333 Mon Sep 17 00:00:00 2001 From: Wenchao Chen Date: Wed, 30 Nov 2022 20:13:28 +0800 Subject: [PATCH 828/963] mmc: sdhci-sprd: Fix no reset data and command after voltage switch After switching the voltage, no reset data and command will cause CMD2 timeout. Fixes: 29ca763fc26f ("mmc: sdhci-sprd: Add pin control support for voltage switch") Signed-off-by: Wenchao Chen Acked-by: Adrian Hunter Reviewed-by: Baolin Wang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221130121328.25553-1-wenchao.chen@unisoc.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-sprd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index b92a408f138d..bec3f9e3cd3f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -470,7 +470,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) } if (IS_ERR(sprd_host->pinctrl)) - return 0; + goto reset; switch (ios->signal_voltage) { case MMC_SIGNAL_VOLTAGE_180: @@ -498,6 +498,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) /* Wait for 300 ~ 500 us for pin state stable */ usleep_range(300, 500); + +reset: sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); return 0; From 8a59b36f143e7db0bdc15d98f60e00cd29a09d97 Mon Sep 17 00:00:00 2001 From: Ulises Mendez Martinez Date: Wed, 30 Nov 2022 23:09:50 +0000 Subject: [PATCH 829/963] ANDROID: allmodconfig: Disable RANDSTRUCT This is a workaround due a double file generation for this config[0]. [0]: generated/randstruct_hash.h Bug: 260313194 Change-Id: Ib6b93a7574328c39eab1ef4a754e1dc752ed6825 Signed-off-by: Ulises Mendez Martinez Signed-off-by: Matthias Maennich --- build.config.allmodconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build.config.allmodconfig b/build.config.allmodconfig index 1301e950d777..8bd0d5f9574b 100644 --- a/build.config.allmodconfig +++ b/build.config.allmodconfig @@ -7,6 +7,9 @@ function update_config() { -d WERROR \ -d SAMPLES \ -d BPFILTER \ + -e RANDSTRUCT_NONE \ + -d RANDSTRUCT_FULL \ + -d RANDSTRUCT \ (cd ${OUT_DIR} && \ make O=${OUT_DIR} $archsubarch CROSS_COMPILE=${CROSS_COMPILE} ${TOOL_ARGS} ${MAKE_ARGS} olddefconfig) From d9f15a9de44affe733e34f93bc184945ba277e6d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 22 Nov 2022 12:16:21 +0000 Subject: [PATCH 830/963] Revert "clocksource/drivers/riscv: Events are stopped during CPU suspend" This reverts commit 232ccac1bd9b5bfe73895f527c08623e7fa0752d. On the subject of suspend, the RISC-V SBI spec states: This does not cover whether any given events actually reach the hart or not, just what the hart will do if it receives an event. On PolarFire SoC, and potentially other SiFive based implementations, events from the RISC-V timer do reach a hart during suspend. This is not the case for the implementation on the Allwinner D1 - there timer events are not received during suspend. To fix this, the CLOCK_EVT_FEAT_C3STOP (mis)feature was enabled for the timer driver - but this has broken both RCU stall detection and timers generally on PolarFire SoC and potentially other SiFive based implementations. If an AXI read to the PCIe controller on PolarFire SoC times out, the system will stall, however, with CLOCK_EVT_FEAT_C3STOP active, the system just locks up without RCU stalling: io scheduler mq-deadline registered io scheduler kyber registered microchip-pcie 2000000000.pcie: host bridge /soc/pcie@2000000000 ranges: microchip-pcie 2000000000.pcie: MEM 0x2008000000..0x2087ffffff -> 0x0008000000 microchip-pcie 2000000000.pcie: sec error in pcie2axi buffer microchip-pcie 2000000000.pcie: ded error in pcie2axi buffer microchip-pcie 2000000000.pcie: axi read request error microchip-pcie 2000000000.pcie: axi read timeout microchip-pcie 2000000000.pcie: sec error in pcie2axi buffer microchip-pcie 2000000000.pcie: ded error in pcie2axi buffer microchip-pcie 2000000000.pcie: sec error in pcie2axi buffer microchip-pcie 2000000000.pcie: ded error in pcie2axi buffer microchip-pcie 2000000000.pcie: sec error in pcie2axi buffer microchip-pcie 2000000000.pcie: ded error in pcie2axi buffer Freeing initrd memory: 7332K Similarly issues were reported with clock_nanosleep() - with a test app that sleeps each cpu for 6, 5, 4, 3 ms respectively, HZ=250 & the blamed commit in place, the sleep times are rounded up to the next jiffy: == CPU: 1 == == CPU: 2 == == CPU: 3 == == CPU: 4 == Mean: 7.974992 Mean: 7.976534 Mean: 7.962591 Mean: 3.952179 Std Dev: 0.154374 Std Dev: 0.156082 Std Dev: 0.171018 Std Dev: 0.076193 Hi: 9.472000 Hi: 10.495000 Hi: 8.864000 Hi: 4.736000 Lo: 6.087000 Lo: 6.380000 Lo: 4.872000 Lo: 3.403000 Samples: 521 Samples: 521 Samples: 521 Samples: 521 Fortunately, the D1 has a second timer, which is "currently used in preference to the RISC-V/SBI timer driver" so a revert here does not hurt operation of D1 in its current form. Ultimately, a DeviceTree property (or node) will be added to encode the behaviour of the timers, but until then revert the addition of CLOCK_EVT_FEAT_C3STOP. Fixes: 232ccac1bd9b ("clocksource/drivers/riscv: Events are stopped during CPU suspend") Signed-off-by: Conor Dooley Signed-off-by: Thomas Gleixner Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Acked-by: Samuel Holland Link: https://lore.kernel.org/linux-riscv/YzYTNQRxLr7Q9JR0@spud/ Link: https://github.com/riscv-non-isa/riscv-sbi-doc/issues/98/ Link: https://lore.kernel.org/linux-riscv/bf6d3b1f-f703-4a25-833e-972a44a04114@sholland.org/ Link: https://lore.kernel.org/r/20221122121620.3522431-1-conor.dooley@microchip.com --- drivers/clocksource/timer-riscv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 969a552da8d2..a0d66fabf073 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -51,7 +51,7 @@ static int riscv_clock_next_event(unsigned long delta, static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, + .features = CLOCK_EVT_FEAT_ONESHOT, .rating = 100, .set_next_event = riscv_clock_next_event, }; From c25b7a7a565e5eeb2459b37583eea67942057511 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Nov 2022 15:06:44 +0100 Subject: [PATCH 831/963] inet: ping: use hlist_nulls rcu iterator during lookup ping_lookup() does not acquire the table spinlock, so iteration should use hlist_nulls_for_each_entry_rcu(). Spotted during code review. Fixes: dbca1596bbb0 ("ping: convert to RCU lookups, get rid of rwlock") Cc: Eric Dumazet Signed-off-by: Florian Westphal Link: https://lore.kernel.org/r/20221129140644.28525-1-fw@strlen.de Signed-off-by: Paolo Abeni --- .clang-format | 1 + include/net/ping.h | 3 --- net/ipv4/ping.c | 7 ++++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.clang-format b/.clang-format index 1247d54f9e49..8d01225bfcb7 100644 --- a/.clang-format +++ b/.clang-format @@ -535,6 +535,7 @@ ForEachMacros: - 'perf_hpp_list__for_each_sort_list_safe' - 'perf_pmu__for_each_hybrid_pmu' - 'ping_portaddr_for_each_entry' + - 'ping_portaddr_for_each_entry_rcu' - 'plist_for_each' - 'plist_for_each_continue' - 'plist_for_each_entry' diff --git a/include/net/ping.h b/include/net/ping.h index e4ff3911cbf5..9233ad3de0ad 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -16,9 +16,6 @@ #define PING_HTABLE_SIZE 64 #define PING_HTABLE_MASK (PING_HTABLE_SIZE-1) -#define ping_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) - /* * gid_t is either uint or ushort. We want to pass it to * proc_dointvec_minmax(), so it must not be larger than MAX_INT diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bde333b24837..04b4ec07bb06 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -49,6 +49,11 @@ #include #endif +#define ping_portaddr_for_each_entry(__sk, node, list) \ + hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) +#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \ + hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) + struct ping_table { struct hlist_nulls_head hash[PING_HTABLE_SIZE]; spinlock_t lock; @@ -192,7 +197,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) return NULL; } - ping_portaddr_for_each_entry(sk, hnode, hslot) { + ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) { isk = inet_sk(sk); pr_debug("iterate\n"); From 46bb86f7e9ea72d36b77b53ea567334596dea865 Mon Sep 17 00:00:00 2001 From: Ulises Mendez Martinez Date: Wed, 30 Nov 2022 19:49:10 +0000 Subject: [PATCH 832/963] ANDROID: GKI: Fix linter warning * Warning solved: The file has no module docstring. * Add license header. Bug: 252888476 Bug: 252888476 Change-Id: Ic0d98a8ef04766b3bca5c3ca6d17daf9dc74346f Signed-off-by: Ulises Mendez Martinez --- modules.bzl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/modules.bzl b/modules.bzl index bd436f3f93bc..6473dcb49384 100644 --- a/modules.bzl +++ b/modules.bzl @@ -1,3 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2022 The Android Open Source Project + +""" +This module contains a full list of kernel modules + compiled by GKI. +""" + COMMON_GKI_MODULES_LIST = [ # keep sorted "drivers/block/zram/zram.ko", From 7572ac3c979d4d0fb42d73a72d2608656516ff4f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 30 Nov 2022 17:37:17 +0100 Subject: [PATCH 833/963] arm64: efi: Revert "Recover from synchronous exceptions ..." This reverts commit 23715a26c8d81291, which introduced some code in assembler that manipulates both the ordinary and the shadow call stack pointer in a way that could potentially be taken advantage of. So let's revert it, and do a better job the next time around. Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/efi.h | 8 -------- arch/arm64/kernel/efi-rt-wrapper.S | 33 ++---------------------------- arch/arm64/kernel/efi.c | 26 ----------------------- arch/arm64/mm/fault.c | 4 ---- 4 files changed, 2 insertions(+), 69 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d6cf535d8352..439e2bc5d5d8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,16 +14,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() - -static inline -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - return false; -} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 67babd5f04c2..75691a2641c1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-112]! + stp x29, x30, [sp, #-32]! mov x29, sp /* @@ -16,20 +16,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) */ stp x1, x18, [sp, #16] - /* - * Preserve all callee saved registers and record the stack pointer - * value in a per-CPU variable so we can recover from synchronous - * exceptions occurring while running the firmware routines. - */ - stp x19, x20, [sp, #32] - stp x21, x22, [sp, #48] - stp x23, x24, [sp, #64] - stp x25, x26, [sp, #80] - stp x27, x28, [sp, #96] - - adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - str x29, [x8] - /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the @@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #112 + ldp x29, x30, [sp], #32 b.ne 0f ret 0: @@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) - -SYM_FUNC_START(__efi_rt_asm_recover) - ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - mov sp, x8 - - ldp x0, x18, [sp, #16] - ldp x19, x20, [sp, #32] - ldp x21, x22, [sp, #48] - ldp x23, x24, [sp, #64] - ldp x25, x26, [sp, #80] - ldp x27, x28, [sp, #96] - ldp x29, x30, [sp], #112 - - b efi_handle_runtime_exception -SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ee53f2a0aa03..a908a37f0367 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,7 +9,6 @@ #include #include -#include #include @@ -145,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } - -asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); - -asmlinkage efi_status_t __efi_rt_asm_recover(void); - -asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) -{ - pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return EFI_ABORTED; -} - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - /* Check whether the exception occurred while running the firmware */ - if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) - return false; - - pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dump_stack(); - - regs->pc = (u64)__efi_rt_asm_recover; - return true; -} diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3e9cf9826417..5b391490e045 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -392,9 +391,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } - if (efi_runtime_fixup_exception(regs, msg)) - return; - die_kernel_fault(msg, addr, esr, regs); } From a89ff5f5cc64b9fe7a992cf56988fd36f56ca82a Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Thu, 17 Nov 2022 11:23:13 -0500 Subject: [PATCH 834/963] hwmon: (coretemp) Check for null before removing sysfs attrs If coretemp_add_core() gets an error then pdata->core_data[indx] is already NULL and has been kfreed. Don't pass that to sysfs_remove_group() as that will crash in sysfs_remove_group(). [Shortened for readability] [91854.020159] sysfs: cannot create duplicate filename '/devices/platform/coretemp.0/hwmon/hwmon2/temp20_label' [91855.126115] BUG: kernel NULL pointer dereference, address: 0000000000000188 [91855.165103] #PF: supervisor read access in kernel mode [91855.194506] #PF: error_code(0x0000) - not-present page [91855.224445] PGD 0 P4D 0 [91855.238508] Oops: 0000 [#1] PREEMPT SMP PTI ... [91855.342716] RIP: 0010:sysfs_remove_group+0xc/0x80 ... [91855.796571] Call Trace: [91855.810524] coretemp_cpu_offline+0x12b/0x1dd [coretemp] [91855.841738] ? coretemp_cpu_online+0x180/0x180 [coretemp] [91855.871107] cpuhp_invoke_callback+0x105/0x4b0 [91855.893432] cpuhp_thread_fun+0x8e/0x150 ... Fix this by checking for NULL first. Signed-off-by: Phil Auld Cc: linux-hwmon@vger.kernel.org Cc: Fenghua Yu Cc: Jean Delvare Cc: Guenter Roeck Link: https://lore.kernel.org/r/20221117162313.3164803-1-pauld@redhat.com Fixes: 199e0de7f5df3 ("hwmon: (coretemp) Merge pkgtemp with coretemp") Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 8bf32c6c85d9..30a19d711f89 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -533,6 +533,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) { struct temp_data *tdata = pdata->core_data[indx]; + /* if we errored on add then this is already gone */ + if (!tdata) + return; + /* Remove the sysfs attributes */ sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); From 7dec14537c5906b8bf40fd6fd6d9c3850f8df11d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 18 Nov 2022 17:33:03 +0800 Subject: [PATCH 835/963] hwmon: (coretemp) fix pci device refcount leak in nv1a_ram_new() As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). So call it after using to avoid refcount leak. Fixes: 14513ee696a0 ("hwmon: (coretemp) Use PCI host bridge ID to identify CPU if necessary") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221118093303.214163-1-yangyingliang@huawei.com Signed-off-by: Guenter Roeck --- drivers/hwmon/coretemp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 30a19d711f89..9bee4d33fbdf 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) */ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { - if (host_bridge->device == tjmax_pci_table[i].device) + if (host_bridge->device == tjmax_pci_table[i].device) { + pci_dev_put(host_bridge); return tjmax_pci_table[i].tjmax; + } } } + pci_dev_put(host_bridge); for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { if (strstr(c->x86_model_id, tjmax_table[i].id)) From 9bdc112be727cf1ba65be79541147f960c3349d8 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Fri, 25 Nov 2022 01:43:29 +0000 Subject: [PATCH 836/963] hwmon: (asus-ec-sensors) Add checks for devm_kcalloc As the devm_kcalloc may return NULL, the return value needs to be checked to avoid NULL poineter dereference. Fixes: d0ddfd241e57 ("hwmon: (asus-ec-sensors) add driver for ASUS EC") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221125014329.121560-1-yuancan@huawei.com Signed-off-by: Guenter Roeck --- drivers/hwmon/asus-ec-sensors.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 81e688975c6a..a901e4e33d81 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -938,6 +938,8 @@ static int asus_ec_probe(struct platform_device *pdev) ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors); ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, sizeof(struct ec_sensor), GFP_KERNEL); + if (!ec_data->sensors) + return -ENOMEM; status = setup_lock_data(dev); if (status) { From 9a8cc8cabc1e351614fd7f9e774757a5143b6fe8 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Tue, 29 Nov 2022 18:53:18 -0500 Subject: [PATCH 837/963] drm/amdgpu: enable Vangogh VCN indirect sram mode So that uses PSP to initialize HW. Fixes: 0c2c02b66c672e ("drm/amdgpu/vcn: add firmware support for dimgrey_cavefish") Signed-off-by: Leo Liu Reviewed-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 0b52af415b28..ce64ca1c6e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 2): fw_name = FIRMWARE_VANGOGH; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case IP_VERSION(3, 0, 16): fw_name = FIRMWARE_DIMGREY_CAVEFISH; From a4412fdd49dc011bcc2c0d81ac4cab7457092650 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 21 Nov 2022 10:44:03 -0500 Subject: [PATCH 838/963] error-injection: Add prompt for function error injection The config to be able to inject error codes into any function annotated with ALLOW_ERROR_INJECTION() is enabled when FUNCTION_ERROR_INJECTION is enabled. But unfortunately, this is always enabled on x86 when KPROBES is enabled, and there's no way to turn it off. As kprobes is useful for observability of the kernel, it is useful to have it enabled in production environments. But error injection should be avoided. Add a prompt to the config to allow it to be disabled even when kprobes is enabled, and get rid of the "def_bool y". This is a kernel debug feature (it's in Kconfig.debug), and should have never been something enabled by default. Cc: stable@vger.kernel.org Fixes: 540adea3809f6 ("error-injection: Separate error-injection from kprobe") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a1005415f0f4..38545c56bf69 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1874,8 +1874,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" From 8bfd4ec726945cec35ee5cafebc1c55b83d5a9fb Mon Sep 17 00:00:00 2001 From: Carsten Haitzler Date: Mon, 28 Nov 2022 10:51:58 +0000 Subject: [PATCH 839/963] i2c: cadence: Fix regression with bus recovery Commit "i2c: cadence: Add standard bus recovery support" breaks for i2c devices that have no pinctrl defined. There is no requirement for this to exist in the DT. This has worked perfectly well without this before in at least 1 real usage case on hardware (Mali Komeda DPU, Cadence i2c to talk to a tda99xx phy). Adding the requirement to have pinctrl set up in the device tree (or otherwise be found) is a regression where the whole i2c device is lost entirely (in this case dropping entire devices which then leads to the drm display stack unable to find the phy for display output, thus having no drm display device and so on down the chain). This converts the above commit to an enhancement if pinctrl can be found for the i2c device, providing a timeout on read with recovery, but if not, do what used to be done rather than a fatal loss of a device. This restores the mentioned display devices to their working state again. Fixes: 58b924241d0a ("i2c: cadence: Add standard bus recovery support") Signed-off-by: Carsten Haitzler Reviewed-by: Shubhrajyoti Datta Reviewed-by: Michael Grzeschik Acked-by: Michal Simek [wsa: added braces to else-branch] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cadence.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index fe0cd205502d..f58943cb1341 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -852,7 +852,8 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, CDNS_I2C_POLL_US, CDNS_I2C_TIMEOUT_US); if (ret) { ret = -EAGAIN; - i2c_recover_bus(adap); + if (id->adap.bus_recovery_info) + i2c_recover_bus(adap); goto out; } @@ -1263,8 +1264,13 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->rinfo.pinctrl = devm_pinctrl_get(&pdev->dev); if (IS_ERR(id->rinfo.pinctrl)) { + int err = PTR_ERR(id->rinfo.pinctrl); + dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n"); - return PTR_ERR(id->rinfo.pinctrl); + if (err != -ENODEV) + return err; + } else { + id->adap.bus_recovery_info = &id->rinfo; } id->membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r_mem); @@ -1283,7 +1289,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->adap.retries = 3; /* Default retry value. */ id->adap.algo_data = id; id->adap.dev.parent = &pdev->dev; - id->adap.bus_recovery_info = &id->rinfo; init_completion(&id->xfer_done); snprintf(id->adap.name, sizeof(id->adap.name), "Cadence I2C at %08lx", (unsigned long)r_mem->start); From 7d8ccf4f117d082156e842d959f634efcf203cef Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Mon, 21 Nov 2022 18:17:52 +0800 Subject: [PATCH 840/963] i2c: qcom-geni: fix error return code in geni_i2c_gpi_xfer Fix to return a negative error code from the gi2c->err instead of 0. Fixes: d8703554f4de ("i2c: qcom-geni: Add support for GPI DMA") Signed-off-by: Wang Yufen Reviewed-by: Tommaso Merciai Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-qcom-geni.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 84a77512614d..8fce98bb77ff 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -626,7 +626,6 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i dev_err(gi2c->se.dev, "I2C timeout gpi flags:%d addr:0x%x\n", gi2c->cur->flags, gi2c->cur->addr); gi2c->err = -ETIMEDOUT; - goto err; } if (gi2c->err) { From d36678f7905cbd1dc55a8a96e066dafd749d4600 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 10 Nov 2022 00:59:02 +0100 Subject: [PATCH 841/963] i2c: imx: Only DMA messages with I2C_M_DMA_SAFE flag set Recent changes to the DMA code has resulting in the IMX driver failing I2C transfers when the buffer has been vmalloc. Only perform DMA transfers if the message has the I2C_M_DMA_SAFE flag set, indicating the client is providing a buffer which is DMA safe. This is a minimal fix for stable. The I2C core provides helpers to allocate a bounce buffer. For a fuller fix the master should make use of these helpers. Fixes: 4544b9f25e70 ("dma-mapping: Add vmap checks to dma_map_single()") Signed-off-by: Andrew Lunn Acked-by: Oleksij Rempel Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 3082183bd66a..fc70920c4dda 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1132,7 +1132,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, int i, result; unsigned int temp; int block_data = msgs->flags & I2C_M_RECV_LEN; - int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; + int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE && + msgs->len >= DMA_THRESHOLD && !block_data; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -1298,7 +1299,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter, result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic); } else { if (!atomic && - i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD) + i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD && + msgs[i].flags & I2C_M_DMA_SAFE) result = i2c_imx_dma_write(i2c_imx, &msgs[i]); else result = i2c_imx_write(i2c_imx, &msgs[i], atomic); From 40b8c2a1af03ba3e8da55a4490d646bfa845e71a Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Wed, 30 Nov 2022 00:21:46 -0800 Subject: [PATCH 842/963] vmxnet3: correctly report encapsulated LRO packet Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, the pathc did not report correctly the encapsulated packet which is LRO'ed by the hypervisor. This patch fixes this issue by using correct callback for the LRO'ed encapsulated packet. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index d3e7b27eb933..3111a8a6b26a 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1396,6 +1396,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, }; u32 num_pkts = 0; bool skip_page_frags = false; + bool encap_lro = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; u16 segCnt = 0, mss = 0; @@ -1556,13 +1557,18 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, if (VMXNET3_VERSION_GE_2(adapter) && rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { struct Vmxnet3_RxCompDescExt *rcdlro; + union Vmxnet3_GenericDesc *gdesc; + rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; + gdesc = (union Vmxnet3_GenericDesc *)rcd; segCnt = rcdlro->segCnt; WARN_ON_ONCE(segCnt == 0); mss = rcdlro->mss; if (unlikely(segCnt <= 1)) segCnt = 0; + encap_lro = (le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)); } else { segCnt = 0; } @@ -1630,7 +1636,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); - if (!rcd->tcp || + if ((!rcd->tcp && !encap_lro) || !(adapter->netdev->features & NETIF_F_LRO)) goto not_lro; @@ -1639,7 +1645,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, SKB_GSO_TCPV4 : SKB_GSO_TCPV6; skb_shinfo(skb)->gso_size = mss; skb_shinfo(skb)->gso_segs = segCnt; - } else if (segCnt != 0 || skb->len > mtu) { + } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) { u32 hlen; hlen = vmxnet3_get_hdr_len(adapter, skb, @@ -1668,6 +1674,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, napi_gro_receive(&rq->napi, skb); ctx->skb = NULL; + encap_lro = false; num_pkts++; } From 409e8ec8c5825591895937b8499b54aa2476fae7 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Wed, 30 Nov 2022 00:21:47 -0800 Subject: [PATCH 843/963] vmxnet3: use correct intrConf reference when using extended queues Commit 39f9895a00f4 ("vmxnet3: add support for 32 Tx/Rx queues") added support for 32Tx/Rx queues. As a part of this patch, intrConf structure was extended to incorporate increased queues. This patch fixes the issue where incorrect reference is being used. Fixes: 39f9895a00f4 ("vmxnet3: add support for 32 Tx/Rx queues") Signed-off-by: Ronak Doshi Acked-by: Guolin Yang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3111a8a6b26a..6f1e560fb15c 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -75,8 +75,14 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter) for (i = 0; i < adapter->intr.num_intrs; i++) vmxnet3_enable_intr(adapter, i); - adapter->shared->devRead.intrConf.intrCtrl &= + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + adapter->shared->devRead.intrConf.intrCtrl &= cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); + } else { + adapter->shared->devReadExt.intrConfExt.intrCtrl &= + cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); + } } @@ -85,8 +91,14 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter) { int i; - adapter->shared->devRead.intrConf.intrCtrl |= + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + adapter->shared->devRead.intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } else { + adapter->shared->devReadExt.intrConfExt.intrCtrl |= + cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } for (i = 0; i < adapter->intr.num_intrs; i++) vmxnet3_disable_intr(adapter, i); } From e65a6897be5e4939d477c4969a05e12d90b08409 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Thu, 1 Dec 2022 12:01:24 +0800 Subject: [PATCH 844/963] iommu/vt-d: Add a fix for devices need extra dtlb flush QAT devices on Intel Sapphire Rapids and Emerald Rapids have a defect in address translation service (ATS). These devices may inadvertently issue ATS invalidation completion before posted writes initiated with translated address that utilized translations matching the invalidation address range, violating the invalidation completion ordering. This patch adds an extra device TLB invalidation for the affected devices, it is needed to ensure no more posted writes with translated address following the invalidation completion. Therefore, the ordering is preserved and data-corruption is prevented. Device TLBs are invalidated under the following six conditions: 1. Device driver does DMA API unmap IOVA 2. Device driver unbind a PASID from a process, sva_unbind_device() 3. PASID is torn down, after PASID cache is flushed. e.g. process exit_mmap() due to crash 4. Under SVA usage, called by mmu_notifier.invalidate_range() where VM has to free pages that were unmapped 5. userspace driver unmaps a DMA buffer 6. Cache invalidation in vSVA usage (upcoming) For #1 and #2, device drivers are responsible for stopping DMA traffic before unmap/unbind. For #3, iommu driver gets mmu_notifier to invalidate TLB the same way as normal user unmap which will do an extra invalidation. The dTLB invalidation after PASID cache flush does not need an extra invalidation. Therefore, we only need to deal with #4 and #5 in this patch. #1 is also covered by this patch due to common code path with #5. Tested-by: Yuzhang Luo Reviewed-by: Ashok Raj Reviewed-by: Kevin Tian Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20221130062449.1360063-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 69 +++++++++++++++++++++++++++++++++++-- drivers/iommu/intel/iommu.h | 4 +++ drivers/iommu/intel/svm.c | 5 ++- 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 996a8b5ee5ee..587eebe39820 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1396,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain) spin_unlock_irqrestore(&domain->lock, flags); } +/* + * The extra devTLB flush quirk impacts those QAT devices with PCI device + * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() + * check because it applies only to the built-in QAT devices and it doesn't + * grant additional privileges. + */ +#define BUGGY_QAT_DEVID_MASK 0x494c +static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) +{ + if (pdev->vendor != PCI_VENDOR_ID_INTEL) + return false; + + if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) + return false; + + return true; +} + static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -1478,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); + quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -4490,9 +4509,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (dev_is_pci(dev)) { if (ecap_dev_iotlb_support(iommu->ecap) && pci_ats_supported(pdev) && - dmar_ats_supported(pdev, iommu)) + dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; - + info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); @@ -4931,3 +4951,48 @@ static void __init check_tylersburg_isoch(void) pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", vtisochctrl); } + +/* + * Here we deal with a device TLB defect where device may inadvertently issue ATS + * invalidation completion before posted writes initiated with translated address + * that utilized translations matching the invalidation address range, violating + * the invalidation completion ordering. + * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is + * vulnerable to this defect. In other words, any dTLB invalidation initiated not + * under the control of the trusted/privileged host device driver must use this + * quirk. + * Device TLBs are invalidated under the following six conditions: + * 1. Device driver does DMA API unmap IOVA + * 2. Device driver unbind a PASID from a process, sva_unbind_device() + * 3. PASID is torn down, after PASID cache is flushed. e.g. process + * exit_mmap() due to crash + * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where + * VM has to free pages that were unmapped + * 5. Userspace driver unmaps a DMA buffer + * 6. Cache invalidation in vSVA usage (upcoming) + * + * For #1 and #2, device drivers are responsible for stopping DMA traffic + * before unmap/unbind. For #3, iommu driver gets mmu_notifier to + * invalidate TLB the same way as normal user unmap which will use this quirk. + * The dTLB invalidation after PASID cache flush does not need this quirk. + * + * As a reminder, #6 will *NEED* this quirk as we enable nested translation. + */ +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long mask, + u32 pasid, u16 qdep) +{ + u16 sid; + + if (likely(!info->dtlb_extra_inval)) + return; + + sid = PCI_DEVID(info->bus, info->devfn); + if (pasid == PASID_RID2PASID) { + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, address, mask); + } else { + qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, + pasid, qdep, address, mask); + } +} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513..db9df7c3790c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -623,6 +623,7 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; + u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -728,6 +729,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order); +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long pages, + u32 pasid, u16 qdep); void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, u32 pasid); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7d08eb034f2d..fe615c53479c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm, return; qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); - if (info->ats_enabled) + if (info->ats_enabled) { qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, svm->pasid, sdev->qdep, address, order_base_2(pages)); + quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), + svm->pasid, sdev->qdep); + } } static void intel_flush_svm_range_dev(struct intel_svm *svm, From 6927d352380797ddbee18631491ec428741696e2 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 1 Dec 2022 12:01:25 +0800 Subject: [PATCH 845/963] iommu/vt-d: Fix PCI device refcount leak in prq_event_thread() As comment of pci_get_domain_bus_and_slot() says, it returns a pci device with refcount increment, when finish using it, the caller must decrease the reference count by calling pci_dev_put(). So call pci_dev_put() after using the 'pdev' to avoid refcount leak. Besides, if the 'pdev' is null or intel_svm_prq_report() returns error, there is no need to trace this fault. Fixes: 06f4b8d09dba ("iommu/vt-d: Remove unnecessary SVA data accesses in page fault path") Suggested-by: Lu Baolu Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221119144028.2452731-1-yangyingliang@huawei.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index fe615c53479c..03b25358946c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -748,12 +748,16 @@ static irqreturn_t prq_event_thread(int irq, void *d) * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req)) - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + if (!pdev) + goto bad_req; - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], - iommu->prq_seq_number++); + if (intel_svm_prq_report(iommu, &pdev->dev, req)) + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + else + trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + iommu->prq_seq_number++); + pci_dev_put(pdev); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } From afca9e19cc720bfafc75dc5ce429c185ca93f31d Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 1 Dec 2022 12:01:26 +0800 Subject: [PATCH 846/963] iommu/vt-d: Fix PCI device refcount leak in has_external_pci() for_each_pci_dev() is implemented by pci_get_device(). The comment of pci_get_device() says that it will increase the reference count for the returned pci_dev and also decrease the reference count for the input pci_dev @from if it is not NULL. If we break for_each_pci_dev() loop with pdev not NULL, we need to call pci_dev_put() to decrease the reference count. Add the missing pci_dev_put() before 'return true' to avoid reference count leak. Fixes: 89a6079df791 ("iommu/vt-d: Force IOMMU on for platform opt in hint") Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221121113649.190393-2-wangxiongfeng2@huawei.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 587eebe39820..5287efe247b1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3873,8 +3873,10 @@ static inline bool has_external_pci(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->external_facing) + if (pdev->external_facing) { + pci_dev_put(pdev); return true; + } return false; } From 4bedbbd782ebbe7287231fea862c158d4f08a9e3 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 1 Dec 2022 12:01:27 +0800 Subject: [PATCH 847/963] iommu/vt-d: Fix PCI device refcount leak in dmar_dev_scope_init() for_each_pci_dev() is implemented by pci_get_device(). The comment of pci_get_device() says that it will increase the reference count for the returned pci_dev and also decrease the reference count for the input pci_dev @from if it is not NULL. If we break for_each_pci_dev() loop with pdev not NULL, we need to call pci_dev_put() to decrease the reference count. Add the missing pci_dev_put() for the error path to avoid reference count leak. Fixes: 2e4552893038 ("iommu/vt-d: Unify the way to process DMAR device scope array") Signed-off-by: Xiongfeng Wang Link: https://lore.kernel.org/r/20221121113649.190393-3-wangxiongfeng2@huawei.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 5a8f780e7ffd..bc94059a5b87 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void) info = dmar_alloc_pci_notify_info(dev, BUS_NOTIFY_ADD_DEVICE); if (!info) { + pci_dev_put(dev); return dmar_dev_scope_status; } else { dmar_pci_bus_add_dev(info); From bfdfddfecdec2386487e091c833cd2e6774cb6c3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 1 Dec 2022 09:24:25 -0800 Subject: [PATCH 848/963] ANDROID: scsi: ufs: Fix the command completion path ufshcd_crypto_clear_prdt() uses the lrbp->cmd pointer and hence must be called before that pointer is cleared. This patch brings the android-mainline command completion code in sync with the android14-5.15 branch. Bug: 259876933 Fixes: 8aa0abda8bd6 ("Merge e1a7aa25ff45 ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi") into android-mainline") Change-Id: I48852eb6623219bb249c4b309c48a651b6815b27 Signed-off-by: Bart Van Assche --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index ad93e3868e47..f851dc08d249 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5366,6 +5366,7 @@ static void ufshcd_release_scsi_cmd(struct ufs_hba *hba, struct scsi_cmnd *cmd = lrbp->cmd; scsi_dma_unmap(cmd); + ufshcd_crypto_clear_prdt(hba, lrbp); lrbp->cmd = NULL; /* Mark the command as completed. */ ufshcd_release(hba); ufshcd_clk_scaling_update_busy(hba); @@ -5395,7 +5396,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, ufshcd_add_command_trace(hba, index, UFS_CMD_COMP); cmd->result = ufshcd_transfer_rsp_status(hba, lrbp); ufshcd_release_scsi_cmd(hba, lrbp); - ufshcd_crypto_clear_prdt(hba, lrbp); /* Do not touch lrbp after scsi done */ scsi_done(cmd); } else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE || From b8b43a4c2e9f66481cc667007aac6d0372e9c4cb Mon Sep 17 00:00:00 2001 From: David Matlack Date: Thu, 1 Dec 2022 11:52:48 -0800 Subject: [PATCH 849/963] KVM: Move halt-polling documentation into common directory Move halt-polling.rst into the common KVM documentation directory and out of the x86-specific directory. Halt-polling is a common feature and the existing documentation is already written as such. Signed-off-by: David Matlack Message-Id: <20221201195249.3369720-2-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/{x86 => }/halt-polling.rst | 0 Documentation/virt/kvm/index.rst | 1 + Documentation/virt/kvm/x86/index.rst | 1 - 3 files changed, 1 insertion(+), 1 deletion(-) rename Documentation/virt/kvm/{x86 => }/halt-polling.rst (100%) diff --git a/Documentation/virt/kvm/x86/halt-polling.rst b/Documentation/virt/kvm/halt-polling.rst similarity index 100% rename from Documentation/virt/kvm/x86/halt-polling.rst rename to Documentation/virt/kvm/halt-polling.rst diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst index e0a2c74e1043..ad13ec55ddfe 100644 --- a/Documentation/virt/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst @@ -17,4 +17,5 @@ KVM locking vcpu-requests + halt-polling review-checklist diff --git a/Documentation/virt/kvm/x86/index.rst b/Documentation/virt/kvm/x86/index.rst index 7ff588826b9f..9ece6b8dc817 100644 --- a/Documentation/virt/kvm/x86/index.rst +++ b/Documentation/virt/kvm/x86/index.rst @@ -10,7 +10,6 @@ KVM for x86 systems amd-memory-encryption cpuid errata - halt-polling hypercalls mmu msr From 34e30ebbe48cc43c14276f863f0d2995c8f13186 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Thu, 1 Dec 2022 11:52:49 -0800 Subject: [PATCH 850/963] KVM: Document the interaction between KVM_CAP_HALT_POLL and halt_poll_ns Clarify the existing documentation about how KVM_CAP_HALT_POLL and halt_poll_ns interact to make it clear that VMs using KVM_CAP_HALT_POLL ignore halt_poll_ns. Signed-off-by: David Matlack Message-Id: <20221201195249.3369720-3-dmatlack@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 13 ++++++------- Documentation/virt/kvm/halt-polling.rst | 13 +++++++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eee9f857a986..896914e3a847 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7213,14 +7213,13 @@ veto the transition. :Parameters: args[0] is the maximum poll time in nanoseconds :Returns: 0 on success; -1 on error -This capability overrides the kvm module parameter halt_poll_ns for the -target VM. +KVM_CAP_HALT_POLL overrides the kvm.halt_poll_ns module parameter to set the +maximum halt-polling time for all vCPUs in the target VM. This capability can +be invoked at any time and any number of times to dynamically change the +maximum halt-polling time. -VCPU polling allows a VCPU to poll for wakeup events instead of immediately -scheduling during guest halts. The maximum time a VCPU can spend polling is -controlled by the kvm module parameter halt_poll_ns. This capability allows -the maximum halt time to specified on a per-VM basis, effectively overriding -the module parameter for the target VM. +See Documentation/virt/kvm/halt-polling.rst for more information on halt +polling. 7.21 KVM_CAP_X86_USER_SPACE_MSR ------------------------------- diff --git a/Documentation/virt/kvm/halt-polling.rst b/Documentation/virt/kvm/halt-polling.rst index 4922e4a15f18..3fae39b1a5ba 100644 --- a/Documentation/virt/kvm/halt-polling.rst +++ b/Documentation/virt/kvm/halt-polling.rst @@ -119,6 +119,19 @@ These module parameters can be set from the debugfs files in: Note: that these module parameters are system wide values and are not able to be tuned on a per vm basis. +Any changes to these parameters will be picked up by new and existing vCPUs the +next time they halt, with the notable exception of VMs using KVM_CAP_HALT_POLL +(see next section). + +KVM_CAP_HALT_POLL +================= + +KVM_CAP_HALT_POLL is a VM capability that allows userspace to override halt_poll_ns +on a per-VM basis. VMs using KVM_CAP_HALT_POLL ignore halt_poll_ns completely (but +still obey halt_poll_ns_grow, halt_poll_ns_grow_start, and halt_poll_ns_shrink). + +See Documentation/virt/kvm/api.rst for more information on this capability. + Further Notes ============= From 42d7731e3e7409f9444ff44e30c025958f1b14f0 Mon Sep 17 00:00:00 2001 From: Ismael Ferreras Morezuelas Date: Sat, 29 Oct 2022 22:24:52 +0200 Subject: [PATCH 851/963] Bluetooth: btusb: Fix CSR clones again by re-adding ERR_DATA_REPORTING quirk A patch series by a Qualcomm engineer essentially removed my quirk/workaround because they thought it was unnecessary. It wasn't, and it broke everything again: https://patchwork.kernel.org/project/netdevbpf/list/?series=661703&archive=both&state=* He argues that the quirk is not necessary because the code should check if the dongle says if it's supported or not. The problem is that for these Chinese CSR clones they say that it would work: = New Index: 00:00:00:00:00:00 (Primary,USB,hci0) = Open Index: 00:00:00:00:00:00 < HCI Command: Read Local Version Information (0x04|0x0001) plen 0 > HCI Event: Command Complete (0x0e) plen 12 > [hci0] 11.276039 Read Local Version Information (0x04|0x0001) ncmd 1 Status: Success (0x00) HCI version: Bluetooth 5.0 (0x09) - Revision 2064 (0x0810) LMP version: Bluetooth 5.0 (0x09) - Subversion 8978 (0x2312) Manufacturer: Cambridge Silicon Radio (10) ... < HCI Command: Read Local Supported Features (0x04|0x0003) plen 0 > HCI Event: Command Complete (0x0e) plen 68 > [hci0] 11.668030 Read Local Supported Commands (0x04|0x0002) ncmd 1 Status: Success (0x00) Commands: 163 entries ... Read Default Erroneous Data Reporting (Octet 18 - Bit 2) Write Default Erroneous Data Reporting (Octet 18 - Bit 3) ... ... < HCI Command: Read Default Erroneous Data Reporting (0x03|0x005a) plen 0 = Close Index: 00:1A:7D:DA:71:XX So bring it back wholesale. Fixes: 63b1a7dd38bf ("Bluetooth: hci_sync: Remove HCI_QUIRK_BROKEN_ERR_DATA_REPORTING") Fixes: e168f6900877 ("Bluetooth: btusb: Remove HCI_QUIRK_BROKEN_ERR_DATA_REPORTING for fake CSR") Fixes: 766ae2422b43 ("Bluetooth: hci_sync: Check LMP feature bit instead of quirk") Cc: stable@vger.kernel.org Cc: Zijun Hu Cc: Luiz Augusto von Dentz Cc: Hans de Goede Tested-by: Ismael Ferreras Morezuelas Signed-off-by: Ismael Ferreras Morezuelas Reviewed-by: Hans de Goede Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 1 + include/net/bluetooth/hci.h | 11 +++++++++++ net/bluetooth/hci_sync.c | 9 +++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 271963805a38..59d947dec36e 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2118,6 +2118,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) * without these the controller will lock up. */ set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks); set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e004ba04a9ae..0fe789f6a653 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -228,6 +228,17 @@ enum { */ HCI_QUIRK_VALID_LE_STATES, + /* When this quirk is set, then erroneous data reporting + * is ignored. This is mainly due to the fact that the HCI + * Read Default Erroneous Data Reporting command is advertised, + * but not supported; these controllers often reply with unknown + * command and tend to lock up randomly. Needing a hard reset. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, + /* * When this quirk is set, then the hci_suspend_notifier is not * registered. This is intended for devices which drop completely diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 76c3107c9f91..a91145e167f2 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3780,7 +3780,8 @@ static int hci_read_page_scan_activity_sync(struct hci_dev *hdev) static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev) { if (!(hdev->commands[18] & 0x04) || - !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING)) + !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || + test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING, @@ -4298,7 +4299,8 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev) bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); if (!(hdev->commands[18] & 0x08) || - !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING)) + !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || + test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) return 0; if (enabled == hdev->err_data_reporting) @@ -4457,6 +4459,9 @@ static const struct { HCI_QUIRK_BROKEN(STORED_LINK_KEY, "HCI Delete Stored Link Key command is advertised, " "but not supported."), + HCI_QUIRK_BROKEN(ERR_DATA_REPORTING, + "HCI Read Default Erroneous Data Reporting command is " + "advertised, but not supported."), HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER, "HCI Read Transmit Power Level command is advertised, " "but not supported."), From 955aebd445e2b49622f2184b7abb82b05c060549 Mon Sep 17 00:00:00 2001 From: Ismael Ferreras Morezuelas Date: Sat, 29 Oct 2022 22:24:53 +0200 Subject: [PATCH 852/963] Bluetooth: btusb: Add debug message for CSR controllers The rationale of showing this is that it's potentially critical information to diagnose and find more CSR compatibility bugs in the future and it will save a lot of headaches. Given that clones come from a wide array of vendors (some are actually Barrot, some are something else) and these numbers are what let us find differences between actual and fake ones, it will be immensely helpful to scour the Internet looking for this pattern and building an actual database to find correlations and improve the checks. Cc: stable@vger.kernel.org Cc: Hans de Goede Signed-off-by: Ismael Ferreras Morezuelas Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 59d947dec36e..f05018988a17 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2056,6 +2056,11 @@ static int btusb_setup_csr(struct hci_dev *hdev) rp = (struct hci_rp_read_local_version *)skb->data; + bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x; LMP ver=%u subver=%04x; manufacturer=%u", + le16_to_cpu(rp->hci_ver), le16_to_cpu(rp->hci_rev), + le16_to_cpu(rp->lmp_ver), le16_to_cpu(rp->lmp_subver), + le16_to_cpu(rp->manufacturer)); + /* Detect a wide host of Chinese controllers that aren't CSR. * * Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891 From 747da1308bdd5021409974f9180f0d8ece53d142 Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Wed, 9 Nov 2022 17:37:26 +0800 Subject: [PATCH 853/963] Bluetooth: 6LoWPAN: add missing hci_dev_put() in get_l2cap_conn() hci_get_route() takes reference, we should use hci_dev_put() to release it when not need anymore. Fixes: 6b8d4a6a0314 ("Bluetooth: 6LoWPAN: Use connected oriented channel instead of fixed one") Signed-off-by: Wang ShaoBo Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/6lowpan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 215af9b3b589..c57d643afb10 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -972,6 +972,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); hci_dev_unlock(hdev); + hci_dev_put(hdev); if (!hcon) return -ENOENT; From 7e7df2c10c92cab7d1dde3b301e584e2e877fbda Mon Sep 17 00:00:00 2001 From: Wang ShaoBo Date: Wed, 9 Nov 2022 10:39:06 +0800 Subject: [PATCH 854/963] Bluetooth: hci_conn: add missing hci_dev_put() in iso_listen_bis() hci_get_route() takes reference, we should use hci_dev_put() to release it when not need anymore. Fixes: f764a6c2c1e4 ("Bluetooth: ISO: Add broadcast support") Signed-off-by: Wang ShaoBo Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index f825857db6d0..26db929b97c4 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -879,6 +879,7 @@ static int iso_listen_bis(struct sock *sk) iso_pi(sk)->bc_sid); hci_dev_unlock(hdev); + hci_dev_put(hdev); return err; } From 696bd3622138bfad0dda9ad7311886b8e6f5ea10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Wed, 16 Nov 2022 21:28:56 +0100 Subject: [PATCH 855/963] Bluetooth: silence a dmesg error message in hci_request.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On kernel 6.1-rcX, I have been getting the following dmesg error message on every boot, resume from suspend and rfkill unblock of the Bluetooth device: Bluetooth: hci0: HCI_REQ-0xfcf0 After some investigation, it turned out to be caused by commit dd50a864ffae ("Bluetooth: Delete unreferenced hci_request code") which modified hci_req_add() in net/bluetooth/hci_request.c to always print an error message when it is executed. In my case, the function was executed by msft_set_filter_enable() in net/bluetooth/msft.c, which provides support for Microsoft vendor opcodes. As explained by Brian Gix, "the error gets logged because it is using a deprecated (but still working) mechanism to issue HCI opcodes" [1]. So this is just a debugging tool to show that a deprecated function is executed. As such, it should not be included in the mainline kernel. See for example commit 771c035372a0 ("deprecate the '__deprecated' attribute warnings entirely and for good") Additionally, this error message is cryptic and the user is not able to do anything about it. [1] Link: https://lore.kernel.org/lkml/beb8dcdc3aee4c5c833aa382f35995f17e7961a1.camel@intel.com/ Fixes: dd50a864ffae ("Bluetooth: Delete unreferenced hci_request code") Signed-off-by: Mateusz Jończyk Cc: Brian Gix Cc: Luiz Augusto von Dentz Cc: Marcel Holtmann Cc: Johan Hedberg Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 5a0296a4352e..f7e006a36382 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -269,7 +269,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, const void *param) { - bt_dev_err(req->hdev, "HCI_REQ-0x%4.4x", opcode); + bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode); hci_req_add_ev(req, opcode, plen, param, 0); } From bcd70260ef56e0aee8a4fc6cd214a419900b0765 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Fri, 18 Nov 2022 15:01:47 -0500 Subject: [PATCH 856/963] Bluetooth: L2CAP: Fix u8 overflow By keep sending L2CAP_CONF_REQ packets, chan->num_conf_rsp increases multiple times and eventually it will wrap around the maximum number (i.e., 255). This patch prevents this by adding a boundary check with L2CAP_MAX_CONF_RSP Btmon log: Bluetooth monitor ver 5.64 = Note: Linux version 6.1.0-rc2 (x86_64) 0.264594 = Note: Bluetooth subsystem version 2.22 0.264636 @ MGMT Open: btmon (privileged) version 1.22 {0x0001} 0.272191 = New Index: 00:00:00:00:00:00 (Primary,Virtual,hci0) [hci0] 13.877604 @ RAW Open: 9496 (privileged) version 2.22 {0x0002} 13.890741 = Open Index: 00:00:00:00:00:00 [hci0] 13.900426 (...) > ACL Data RX: Handle 200 flags 0x00 dlen 1033 #32 [hci0] 14.273106 invalid packet size (12 != 1033) 08 00 01 00 02 01 04 00 01 10 ff ff ............ > ACL Data RX: Handle 200 flags 0x00 dlen 1547 #33 [hci0] 14.273561 invalid packet size (14 != 1547) 0a 00 01 00 04 01 06 00 40 00 00 00 00 00 ........@..... > ACL Data RX: Handle 200 flags 0x00 dlen 2061 #34 [hci0] 14.274390 invalid packet size (16 != 2061) 0c 00 01 00 04 01 08 00 40 00 00 00 00 00 00 04 ........@....... > ACL Data RX: Handle 200 flags 0x00 dlen 2061 #35 [hci0] 14.274932 invalid packet size (16 != 2061) 0c 00 01 00 04 01 08 00 40 00 00 00 07 00 03 00 ........@....... = bluetoothd: Bluetooth daemon 5.43 14.401828 > ACL Data RX: Handle 200 flags 0x00 dlen 1033 #36 [hci0] 14.275753 invalid packet size (12 != 1033) 08 00 01 00 04 01 04 00 40 00 00 00 ........@... Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9c24947aa41e..9fdede5fe71c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4453,7 +4453,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - chan->num_conf_rsp++; + if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) + chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; From 93df7d56f15e217009323c0fbb5213ab7a14520b Mon Sep 17 00:00:00 2001 From: Chethan T N Date: Tue, 22 Nov 2022 14:32:56 +0530 Subject: [PATCH 857/963] Bluetooth: Remove codec id field in vendor codec definition As per the specfication vendor codec id is defined. BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 4, Part E page 2127 Fixes: 9ae664028a9e ("Bluetooth: Add support for Read Local Supported Codecs V2") Signed-off-by: Chethan T N Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 0fe789f6a653..684f1cd28730 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1435,7 +1435,6 @@ struct hci_std_codecs_v2 { } __packed; struct hci_vnd_codec_v2 { - __u8 id; __le16 cid; __le16 vid; __u8 transport; From 828cea2b71de501827f62d3c92d149f6052ad01e Mon Sep 17 00:00:00 2001 From: Chethan T N Date: Tue, 22 Nov 2022 14:32:57 +0530 Subject: [PATCH 858/963] Bluetooth: Fix support for Read Local Supported Codecs V2 Handling of Read Local Supported Codecs was broken during the HCI serialization design change patches. Fixes: d0b137062b2d ("Bluetooth: hci_sync: Rework init stages") Signed-off-by: Chethan T N Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_codec.c | 19 ++++++++++--------- net/bluetooth/hci_sync.c | 10 ++++++---- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/net/bluetooth/hci_codec.c b/net/bluetooth/hci_codec.c index 38201532f58e..3cc135bb1d30 100644 --- a/net/bluetooth/hci_codec.c +++ b/net/bluetooth/hci_codec.c @@ -72,9 +72,8 @@ static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport, continue; } - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS, - sizeof(*cmd), cmd, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS, + sizeof(*cmd), cmd, 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read codec capabilities (%ld)", PTR_ERR(skb)); @@ -127,8 +126,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) struct hci_op_read_local_codec_caps caps; __u8 i; - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, + 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read local supported codecs (%ld)", @@ -158,7 +157,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) for (i = 0; i < std_codecs->num; i++) { caps.id = std_codecs->codec[i]; caps.direction = 0x00; - hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps); + hci_read_codec_capabilities(hdev, + LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps); } skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num) @@ -178,7 +178,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) caps.cid = vnd_codecs->codec[i].cid; caps.vid = vnd_codecs->codec[i].vid; caps.direction = 0x00; - hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps); + hci_read_codec_capabilities(hdev, + LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps); } error: @@ -194,8 +195,8 @@ void hci_read_supported_codecs_v2(struct hci_dev *hdev) struct hci_op_read_local_codec_caps caps; __u8 i; - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL, + 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read local supported codecs (%ld)", diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index a91145e167f2..1fc693122a47 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -12,6 +12,7 @@ #include #include "hci_request.h" +#include "hci_codec.h" #include "hci_debugfs.h" #include "smp.h" #include "eir.h" @@ -4239,11 +4240,12 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev) /* Read local codec list if the HCI command is supported */ static int hci_read_local_codecs_sync(struct hci_dev *hdev) { - if (!(hdev->commands[29] & 0x20)) - return 0; + if (hdev->commands[45] & 0x04) + hci_read_supported_codecs_v2(hdev); + else if (hdev->commands[29] & 0x20) + hci_read_supported_codecs(hdev); - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, - HCI_CMD_TIMEOUT); + return 0; } /* Read local pairing options if the HCI command is supported */ From 2f3957c7eb4e07df944169a3e50a4d6790e1c744 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Tue, 29 Nov 2022 17:25:56 +0800 Subject: [PATCH 859/963] Bluetooth: Fix not cleanup led when bt_init fails bt_init() calls bt_leds_init() to register led, but if it fails later, bt_leds_cleanup() is not called to unregister it. This can cause panic if the argument "bluetooth-power" in text is freed and then another led_trigger_register() tries to access it: BUG: unable to handle page fault for address: ffffffffc06d3bc0 RIP: 0010:strcmp+0xc/0x30 Call Trace: led_trigger_register+0x10d/0x4f0 led_trigger_register_simple+0x7d/0x100 bt_init+0x39/0xf7 [bluetooth] do_one_initcall+0xd0/0x4e0 Fixes: e64c97b53bc6 ("Bluetooth: Add combined LED trigger for controller power") Signed-off-by: Chen Zhongjin Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/af_bluetooth.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index dc65974f5adb..1c3c7ff5c3c6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -737,7 +737,7 @@ static int __init bt_init(void) err = bt_sysfs_init(); if (err < 0) - return err; + goto cleanup_led; err = sock_register(&bt_sock_family_ops); if (err) @@ -773,6 +773,8 @@ static int __init bt_init(void) sock_unregister(PF_BLUETOOTH); cleanup_sysfs: bt_sysfs_cleanup(); +cleanup_led: + bt_leds_cleanup(); return err; } From b5ca338751ad4783ec8d37b5d99c3e37b7813e59 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 29 Nov 2022 12:54:13 -0800 Subject: [PATCH 860/963] Bluetooth: Fix crash when replugging CSR fake controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems fake CSR 5.0 clones can cause the suspend notifier to be registered twice causing the following kernel panic: [ 71.986122] Call Trace: [ 71.986124] [ 71.986125] blocking_notifier_chain_register+0x33/0x60 [ 71.986130] hci_register_dev+0x316/0x3d0 [bluetooth 99b5497ea3d09708fa1366c1dc03288bf3cca8da] [ 71.986154] btusb_probe+0x979/0xd85 [btusb e1e0605a4f4c01984a4b9c8ac58c3666ae287477] [ 71.986159] ? __pm_runtime_set_status+0x1a9/0x300 [ 71.986162] ? ktime_get_mono_fast_ns+0x3e/0x90 [ 71.986167] usb_probe_interface+0xe3/0x2b0 [ 71.986171] really_probe+0xdb/0x380 [ 71.986174] ? pm_runtime_barrier+0x54/0x90 [ 71.986177] __driver_probe_device+0x78/0x170 [ 71.986180] driver_probe_device+0x1f/0x90 [ 71.986183] __device_attach_driver+0x89/0x110 [ 71.986186] ? driver_allows_async_probing+0x70/0x70 [ 71.986189] bus_for_each_drv+0x8c/0xe0 [ 71.986192] __device_attach+0xb2/0x1e0 [ 71.986195] bus_probe_device+0x92/0xb0 [ 71.986198] device_add+0x422/0x9a0 [ 71.986201] ? sysfs_merge_group+0xd4/0x110 [ 71.986205] usb_set_configuration+0x57a/0x820 [ 71.986208] usb_generic_driver_probe+0x4f/0x70 [ 71.986211] usb_probe_device+0x3a/0x110 [ 71.986213] really_probe+0xdb/0x380 [ 71.986216] ? pm_runtime_barrier+0x54/0x90 [ 71.986219] __driver_probe_device+0x78/0x170 [ 71.986221] driver_probe_device+0x1f/0x90 [ 71.986224] __device_attach_driver+0x89/0x110 [ 71.986227] ? driver_allows_async_probing+0x70/0x70 [ 71.986230] bus_for_each_drv+0x8c/0xe0 [ 71.986232] __device_attach+0xb2/0x1e0 [ 71.986235] bus_probe_device+0x92/0xb0 [ 71.986237] device_add+0x422/0x9a0 [ 71.986239] ? _dev_info+0x7d/0x98 [ 71.986242] ? blake2s_update+0x4c/0xc0 [ 71.986246] usb_new_device.cold+0x148/0x36d [ 71.986250] hub_event+0xa8a/0x1910 [ 71.986255] process_one_work+0x1c4/0x380 [ 71.986259] worker_thread+0x51/0x390 [ 71.986262] ? rescuer_thread+0x3b0/0x3b0 [ 71.986264] kthread+0xdb/0x110 [ 71.986266] ? kthread_complete_and_exit+0x20/0x20 [ 71.986268] ret_from_fork+0x1f/0x30 [ 71.986273] [ 71.986274] ---[ end trace 0000000000000000 ]--- [ 71.986284] btusb: probe of 2-1.6:1.0 failed with error -17 Link: https://bugzilla.kernel.org/show_bug.cgi?id=216683 Cc: stable@vger.kernel.org Signed-off-by: Luiz Augusto von Dentz Tested-by: Leonardo Eugênio --- net/bluetooth/hci_core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0540555b3704..d97fac4f7130 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2764,7 +2764,8 @@ int hci_register_suspend_notifier(struct hci_dev *hdev) { int ret = 0; - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + if (!hdev->suspend_notifier.notifier_call && + !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { hdev->suspend_notifier.notifier_call = hci_suspend_notifier; ret = register_pm_notifier(&hdev->suspend_notifier); } @@ -2776,8 +2777,11 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev) { int ret = 0; - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) + if (hdev->suspend_notifier.notifier_call) { ret = unregister_pm_notifier(&hdev->suspend_notifier); + if (!ret) + hdev->suspend_notifier.notifier_call = NULL; + } return ret; } From 6647e76ab623b2b3fb2efe03a86e9c9046c52c33 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Nov 2022 16:10:52 -0800 Subject: [PATCH 861/963] v4l2: don't fall back to follow_pfn() if pin_user_pages_fast() fails The V4L2_MEMORY_USERPTR interface is long deprecated and shouldn't be used (and is discouraged for any modern v4l drivers). And Seth Jenkins points out that the fallback to VM_PFNMAP/VM_IO is fundamentally racy and dangerous. Note that it's not even a case that should trigger, since any normal user pointer logic ends up just using the pin_user_pages_fast() call that does the proper page reference counting. That's not the problem case, only if you try to use special device mappings do you have any issues. Normally I'd just remove this during the merge window, but since Seth pointed out the problem cases, we really want to know as soon as possible if there are actually any users of this odd special case of a legacy interface. Neither Hans nor Mauro seem to think that such mis-uses of the old legacy interface should exist. As Mauro says: "See, V4L2 has actually 4 streaming APIs: - Kernel-allocated mmap (usually referred simply as just mmap); - USERPTR mmap; - read(); - dmabuf; The USERPTR is one of the oldest way to use it, coming from V4L version 1 times, and by far the least used one" And Hans chimed in on the USERPTR interface: "To be honest, I wouldn't mind if it goes away completely, but that's a bit of a pipe dream right now" but while removing this legacy interface entirely may be a pipe dream we can at least try to remove the unlikely (and actively broken) case of using special device mappings for USERPTR accesses. This replaces it with a WARN_ONCE() that we can remove once we've hopefully confirmed that no actual users exist. NOTE! Longer term, this means that a 'struct frame_vector' only ever contains proper page pointers, and all the games we have with converting them to pages can go away (grep for 'frame_vector_to_pages()' and the uses of 'vec->is_pfns'). But this is just the first step, to verify that this code really is all dead, and do so as quickly as possible. Reported-by: Seth Jenkins Acked-by: Hans Verkuil Acked-by: Mauro Carvalho Chehab Cc: David Hildenbrand Cc: Jan Kara Signed-off-by: Linus Torvalds --- drivers/media/common/videobuf2/frame_vector.c | 64 +++---------------- 1 file changed, 10 insertions(+), 54 deletions(-) diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 542dde9d2609..144027035892 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -35,11 +35,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, struct frame_vector *vec) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int ret_pin_user_pages_fast = 0; - int ret = 0; - int err; + int ret; if (nr_frames == 0) return 0; @@ -52,57 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = pin_user_pages_fast(start, nr_frames, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, (struct page **)(vec->ptrs)); - if (ret > 0) { - vec->got_ref = true; - vec->is_pfns = false; - goto out_unlocked; - } - ret_pin_user_pages_fast = ret; + vec->got_ref = true; + vec->is_pfns = false; + vec->nr_frames = ret; - mmap_read_lock(mm); - vec->got_ref = false; - vec->is_pfns = true; - ret = 0; - do { - unsigned long *nums = frame_vector_pfns(vec); + if (likely(ret > 0)) + return ret; - vma = vma_lookup(mm, start); - if (!vma) - break; - - while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { - err = follow_pfn(vma, start, &nums[ret]); - if (err) { - if (ret) - goto out; - // If follow_pfn() returns -EINVAL, then this - // is not an IO mapping or a raw PFN mapping. - // In that case, return the original error from - // pin_user_pages_fast(). Otherwise this - // function would return -EINVAL when - // pin_user_pages_fast() returned -ENOMEM, - // which makes debugging hard. - if (err == -EINVAL && ret_pin_user_pages_fast) - ret = ret_pin_user_pages_fast; - else - ret = err; - goto out; - } - start += PAGE_SIZE; - ret++; - } - /* Bail out if VMA doesn't completely cover the tail page. */ - if (start < vma->vm_end) - break; - } while (ret < nr_frames); -out: - mmap_read_unlock(mm); -out_unlocked: - if (!ret) - ret = -EFAULT; - if (ret > 0) - vec->nr_frames = ret; - return ret; + /* This used to (racily) return non-refcounted pfns. Let people know */ + WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); + vec->nr_frames = 0; + return ret ? ret : -EFAULT; } EXPORT_SYMBOL(get_vaddr_frames); From 8c9a59939deb4bfafdc451100c03d1e848b4169b Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Fri, 2 Dec 2022 15:37:46 -0800 Subject: [PATCH 862/963] Input: raydium_ts_i2c - fix memory leak in raydium_i2c_send() There is a kmemleak when test the raydium_i2c_ts with bpf mock device: unreferenced object 0xffff88812d3675a0 (size 8): comm "python3", pid 349, jiffies 4294741067 (age 95.695s) hex dump (first 8 bytes): 11 0e 10 c0 01 00 04 00 ........ backtrace: [<0000000068427125>] __kmalloc+0x46/0x1b0 [<0000000090180f91>] raydium_i2c_send+0xd4/0x2bf [raydium_i2c_ts] [<000000006e631aee>] raydium_i2c_initialize.cold+0xbc/0x3e4 [raydium_i2c_ts] [<00000000dc6fcf38>] raydium_i2c_probe+0x3cd/0x6bc [raydium_i2c_ts] [<00000000a310de16>] i2c_device_probe+0x651/0x680 [<00000000f5a96bf3>] really_probe+0x17c/0x3f0 [<00000000096ba499>] __driver_probe_device+0xe3/0x170 [<00000000c5acb4d9>] driver_probe_device+0x49/0x120 [<00000000264fe082>] __device_attach_driver+0xf7/0x150 [<00000000f919423c>] bus_for_each_drv+0x114/0x180 [<00000000e067feca>] __device_attach+0x1e5/0x2d0 [<0000000054301fc2>] bus_probe_device+0x126/0x140 [<00000000aad93b22>] device_add+0x810/0x1130 [<00000000c086a53f>] i2c_new_client_device+0x352/0x4e0 [<000000003c2c248c>] of_i2c_register_device+0xf1/0x110 [<00000000ffec4177>] of_i2c_notify+0x100/0x160 unreferenced object 0xffff88812d3675c8 (size 8): comm "python3", pid 349, jiffies 4294741070 (age 95.692s) hex dump (first 8 bytes): 22 00 36 2d 81 88 ff ff ".6-.... backtrace: [<0000000068427125>] __kmalloc+0x46/0x1b0 [<0000000090180f91>] raydium_i2c_send+0xd4/0x2bf [raydium_i2c_ts] [<000000001d5c9620>] raydium_i2c_initialize.cold+0x223/0x3e4 [raydium_i2c_ts] [<00000000dc6fcf38>] raydium_i2c_probe+0x3cd/0x6bc [raydium_i2c_ts] [<00000000a310de16>] i2c_device_probe+0x651/0x680 [<00000000f5a96bf3>] really_probe+0x17c/0x3f0 [<00000000096ba499>] __driver_probe_device+0xe3/0x170 [<00000000c5acb4d9>] driver_probe_device+0x49/0x120 [<00000000264fe082>] __device_attach_driver+0xf7/0x150 [<00000000f919423c>] bus_for_each_drv+0x114/0x180 [<00000000e067feca>] __device_attach+0x1e5/0x2d0 [<0000000054301fc2>] bus_probe_device+0x126/0x140 [<00000000aad93b22>] device_add+0x810/0x1130 [<00000000c086a53f>] i2c_new_client_device+0x352/0x4e0 [<000000003c2c248c>] of_i2c_register_device+0xf1/0x110 [<00000000ffec4177>] of_i2c_notify+0x100/0x160 After BANK_SWITCH command from i2c BUS, no matter success or error happened, the tx_buf should be freed. Fixes: 3b384bd6c3f2 ("Input: raydium_ts_i2c - do not split tx transactions") Signed-off-by: Zhang Xiaoxu Link: https://lore.kernel.org/r/20221202103412.2120169-1-zhangxiaoxu5@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/raydium_i2c_ts.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 3a4952935366..3d9c5758d8a4 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -211,12 +211,14 @@ static int raydium_i2c_send(struct i2c_client *client, error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer)); if (likely(!error)) - return 0; + goto out; msleep(RM_RETRY_DELAY_MS); } while (++tries < RM_MAX_RETRIES); dev_err(&client->dev, "%s failed: %d\n", __func__, error); +out: + kfree(tx_buf); return error; } From 66065157420c5b9b3f078f43d313c153e1ff7f83 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 30 Nov 2022 07:25:51 -0800 Subject: [PATCH 863/963] x86/bugs: Make sure MSR_SPEC_CTRL is updated properly upon resume from S3 The "force" argument to write_spec_ctrl_current() is currently ambiguous as it does not guarantee the MSR write. This is due to the optimization that writes to the MSR happen only when the new value differs from the cached value. This is fine in most cases, but breaks for S3 resume when the cached MSR value gets out of sync with the hardware MSR value due to S3 resetting it. When x86_spec_ctrl_current is same as x86_spec_ctrl_base, the MSR write is skipped. Which results in SPEC_CTRL mitigations not getting restored. Move the MSR write from write_spec_ctrl_current() to a new function that unconditionally writes to the MSR. Update the callers accordingly and rename functions. [ bp: Rework a bit. ] Fixes: caa0ff24d5d0 ("x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value") Suggested-by: Borislav Petkov Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Cc: Link: https://lore.kernel.org/r/806d39b0bfec2fe8f50dc5446dff20f5bb24a959.1669821572.git.pawan.kumar.gupta@linux.intel.com Signed-off-by: Linus Torvalds --- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 21 ++++++++++++++------- arch/x86/kernel/process.c | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c936ce9f0c47..dfdb103ae4f6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -extern void write_spec_ctrl_current(u64 val, bool force); +extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e3230cccaa7..6daf84229548 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); static DEFINE_MUTEX(spec_ctrl_mutex); +/* Update SPEC_CTRL MSR and its cached copy unconditionally */ +static void update_spec_ctrl(u64 val) +{ + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val, bool force) +void update_spec_ctrl_cond(u64 val) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; @@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force) * When KERNEL_IBRS this MSR is written on return-to-user, unless * forced the update can be delayed until that time. */ - if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) wrmsrl(MSR_IA32_SPEC_CTRL, val); } @@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) if (ia32_cap & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } switch (mode) { @@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); - write_spec_ctrl_current(val, true); + update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21b7347a26d..e436c9c1ef3b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr, false); + update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) From 3d8fdcbf1f42e2bb9ae8b8c0b6f202278c788a22 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Thu, 1 Dec 2022 17:00:30 +0300 Subject: [PATCH 864/963] net: dsa: ksz: Check return value Return NULL if we got unexpected value from skb_trim_rcsum() in ksz_common_rcv() Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: bafe9ba7d908 ("net: dsa: ksz: Factor out common tag code") Signed-off-by: Artem Chernyshev Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221201140032.26746-1-artem.chernyshev@red-soft.ru Signed-off-by: Jakub Kicinski --- net/dsa/tag_ksz.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 38fa19c1e2d5..429250298ac4 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -21,7 +21,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - pskb_trim_rcsum(skb, skb->len - len); + if (pskb_trim_rcsum(skb, skb->len - len)) + return NULL; dsa_default_offload_fwd_mark(skb); From d4edb50688652eb10be270bc515da63815de428f Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Thu, 1 Dec 2022 17:00:31 +0300 Subject: [PATCH 865/963] net: dsa: hellcreek: Check return value Return NULL if we got unexpected value from skb_trim_rcsum() in hellcreek_rcv() Fixes: 01ef09caad66 ("net: dsa: Add tag handling for Hirschmann Hellcreek switches") Signed-off-by: Artem Chernyshev Reviewed-by: Florian Fainelli Reviewed-by: Kurt Kanzenbach Link: https://lore.kernel.org/r/20221201140032.26746-2-artem.chernyshev@red-soft.ru Signed-off-by: Jakub Kicinski --- net/dsa/tag_hellcreek.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index 846588c0070a..53a206d11685 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -49,7 +49,8 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, return NULL; } - pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN); + if (pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN)) + return NULL; dsa_default_offload_fwd_mark(skb); From 8948876335b1752176afdff8e704099a3ea0f6e6 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Thu, 1 Dec 2022 17:00:32 +0300 Subject: [PATCH 866/963] net: dsa: sja1105: Check return value Return NULL if we got unexpected value from skb_trim_rcsum() in sja1110_rcv_inband_control_extension() Fixes: 4913b8ebf8a9 ("net: dsa: add support for the SJA1110 native tagging protocol") Signed-off-by: Artem Chernyshev Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221201140032.26746-3-artem.chernyshev@red-soft.ru Signed-off-by: Jakub Kicinski --- net/dsa/tag_sja1105.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 83e4136516b0..1a85125bda6d 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -665,7 +665,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, * padding and trailer we need to account for the fact that * skb->data points to skb_mac_header(skb) + ETH_HLEN. */ - pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN); + if (pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN)) + return NULL; /* Trap-to-host frame, no timestamp trailer */ } else { *source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header); From 6648eadba8d6b37c8e6cb1b906f68509b3b39385 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 1 Dec 2022 21:18:52 +0800 Subject: [PATCH 867/963] selftests/tls: Fix tls selftests dependency to correct algorithm Commit d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") moves SM3 and SM4 algorithm implementations from stand-alone library to crypto API. The corresponding configuration options for the API version (generic) are CONFIG_CRYPTO_SM3_GENERIC and CONFIG_CRYPTO_SM4_GENERIC, respectively. Replace option selected in selftests configuration from the library version to the API version. Fixes: d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") Reported-by: Hangbin Liu Cc: Jason A. Donenfeld Cc: stable@vger.kernel.org # v5.19+ Signed-off-by: Tianjia Zhang Link: https://lore.kernel.org/r/20221201131852.38501-1-tianjia.zhang@linux.alibaba.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index ead7963b9bf0..bd89198cd817 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -43,5 +43,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y -CONFIG_CRYPTO_SM4=y +CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m From 85a0506c073332a3057f5a9635fa0d4db5a8e03b Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 1 Dec 2022 16:22:46 +0800 Subject: [PATCH 868/963] selftests: rtnetlink: correct xfrm policy rule in kci_test_ipsec_offload When testing in kci_test_ipsec_offload, srcip is configured as $dstip, it should add xfrm policy rule in instead of out. The test result of this patch is as follows: PASS: ipsec_offload Fixes: 2766a11161cc ("selftests: rtnetlink: add ipsec offload API test") Signed-off-by: Zhengchao Shao Acked-by: Hangbin Liu Link: https://lore.kernel.org/r/20221201082246.14131-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rtnetlink.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 0900c5438fbb..275491be3da2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -782,7 +782,7 @@ kci_test_ipsec_offload() tmpl proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 check_err $? - ip x p add dir out src $dstip/24 dst $srcip/24 \ + ip x p add dir in src $dstip/24 dst $srcip/24 \ tmpl proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 check_err $? From 6a30d3e3491dc562384e9f15b201a8a25b57439f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 1 Dec 2022 11:10:48 +0800 Subject: [PATCH 869/963] selftests: net: Use "grep -E" instead of "egrep" The latest version of grep claims the egrep is now obsolete so the build now contains warnings that look like: egrep: warning: egrep is obsolescent; using grep -E fix this using "grep -E" instead. sed -i "s/egrep/grep -E/g" `grep egrep -rwl tools/testing/selftests/net` Here are the steps to install the latest grep: wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz tar xf grep-3.8.tar.gz cd grep-3.8 && ./configure && make sudo make install export PATH=/usr/local/bin:$PATH Signed-off-by: Tiezhu Yang Link: https://lore.kernel.org/r/1669864248-829-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/toeplitz.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh index 0a49907cd4fe..da5bfd834eff 100755 --- a/tools/testing/selftests/net/toeplitz.sh +++ b/tools/testing/selftests/net/toeplitz.sh @@ -32,7 +32,7 @@ DEV="eth0" # This is determined by reading the RSS indirection table using ethtool. get_rss_cfg_num_rxqs() { echo $(ethtool -x "${DEV}" | - egrep [[:space:]]+[0-9]+:[[:space:]]+ | + grep -E [[:space:]]+[0-9]+:[[:space:]]+ | cut -d: -f2- | awk '{$1=$1};1' | tr ' ' '\n' | From 24013314be6ee4ee456114a671e9fa3461323de8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 30 Nov 2022 10:57:47 -0800 Subject: [PATCH 870/963] drm/shmem-helper: Remove errant put in error path drm_gem_shmem_mmap() doesn't own this reference, resulting in the GEM object getting prematurely freed leading to a later use-after-free. Link: https://syzkaller.appspot.com/bug?extid=c8ae65286134dd1b800d Reported-by: syzbot+c8ae65286134dd1b800d@syzkaller.appspotmail.com Fixes: 2194a63a818d ("drm: Add library for shmem backed GEM objects") Cc: stable@vger.kernel.org Signed-off-by: Rob Clark Reviewed-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221130185748.357410-2-robdclark@gmail.com --- drivers/gpu/drm/drm_gem_shmem_helper.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 35138f8a375c..3b7b71391a4c 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -622,10 +622,8 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct } ret = drm_gem_shmem_get_pages(shmem); - if (ret) { - drm_gem_vm_close(vma); + if (ret) return ret; - } vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); From 09bf649a74573cb596e211418a4f8008f265c5a9 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 30 Nov 2022 10:57:48 -0800 Subject: [PATCH 871/963] drm/shmem-helper: Avoid vm_open error paths vm_open() is not allowed to fail. Fortunately we are guaranteed that the pages are already pinned, thanks to the initial mmap which is now being cloned into a forked process, and only need to increment the refcnt. So just increment it directly. Previously if a signal was delivered at the wrong time to the forking process, the mutex_lock_interruptible() could fail resulting in the pages_use_count not being incremented. Fixes: 2194a63a818d ("drm: Add library for shmem backed GEM objects") Cc: stable@vger.kernel.org Signed-off-by: Rob Clark Reviewed-by: Daniel Vetter Signed-off-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221130185748.357410-3-robdclark@gmail.com --- drivers/gpu/drm/drm_gem_shmem_helper.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 3b7b71391a4c..b602cd72a120 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -571,12 +571,20 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); - int ret; WARN_ON(shmem->base.import_attach); - ret = drm_gem_shmem_get_pages(shmem); - WARN_ON_ONCE(ret != 0); + mutex_lock(&shmem->pages_lock); + + /* + * We should have already pinned the pages when the buffer was first + * mmap'd, vm_open() just grabs an additional reference for the new + * mm the vma is getting copied into (ie. on fork()). + */ + if (!WARN_ON_ONCE(!shmem->pages_use_count)) + shmem->pages_use_count++; + + mutex_unlock(&shmem->pages_lock); drm_gem_vm_open(vma); } From 23393c6461422df5bf8084a086ada9a7e17dc2ba Mon Sep 17 00:00:00 2001 From: Jan Dabros Date: Mon, 28 Nov 2022 20:56:51 +0100 Subject: [PATCH 872/963] char: tpm: Protect tpm_pm_suspend with locks Currently tpm transactions are executed unconditionally in tpm_pm_suspend() function, which may lead to races with other tpm accessors in the system. Specifically, the hw_random tpm driver makes use of tpm_get_random(), and this function is called in a loop from a kthread, which means it's not frozen alongside userspace, and so can race with the work done during system suspend: tpm tpm0: tpm_transmit: tpm_recv: error -52 tpm tpm0: invalid TPM_STS.x 0xff, dumping stack for forensics CPU: 0 PID: 1 Comm: init Not tainted 6.1.0-rc5+ #135 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-20220807_005459-localhost 04/01/2014 Call Trace: tpm_tis_status.cold+0x19/0x20 tpm_transmit+0x13b/0x390 tpm_transmit_cmd+0x20/0x80 tpm1_pm_suspend+0xa6/0x110 tpm_pm_suspend+0x53/0x80 __pnp_bus_suspend+0x35/0xe0 __device_suspend+0x10f/0x350 Fix this by calling tpm_try_get_ops(), which itself is a wrapper around tpm_chip_start(), but takes the appropriate mutex. Signed-off-by: Jan Dabros Reported-by: Vlastimil Babka Tested-by: Jason A. Donenfeld Tested-by: Vlastimil Babka Link: https://lore.kernel.org/all/c5ba47ef-393f-1fba-30bd-1230d1b4b592@suse.cz/ Cc: stable@vger.kernel.org Fixes: e891db1a18bf ("tpm: turn on TPM on suspend for TPM 1.x") [Jason: reworked commit message, added metadata] Signed-off-by: Jason A. Donenfeld Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds --- drivers/char/tpm/tpm-interface.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 1621ce818705..d69905233aff 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev) !pm_suspend_via_firmware()) goto suspended; - if (!tpm_chip_start(chip)) { + rc = tpm_try_get_ops(chip); + if (!rc) { if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_STATE); else rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); - tpm_chip_stop(chip); + tpm_put_ops(chip); } suspended: From 0ba09b1733878afe838fe35c310715fda3d46428 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Dec 2022 12:51:59 -0800 Subject: [PATCH 873/963] Revert "mm: align larger anonymous mappings on THP boundaries" This reverts commit f35b5d7d676e59e401690b678cd3cfec5e785c23. It has been reported to cause huge performance regressions on some loads (will-it-scale.per_process_ops, but also building the kernel with clang). The commit did speed up gcc builds by a small amount, so it's not an unambiguous regression, but until the big regressions are understood, let's revert it. Reported-by: kernel test robot Link: https://lore.kernel.org/r/202210181535.7144dd15-yujie.liu@intel.com Reported-by: Nathan Chancellor Link: https://lore.kernel.org/lkml/Y1DNQaoPWxE%2BrGce@dev-arch.thelio-3990X/ Cc: Huang, Ying Cc: Rik van Riel Cc: Andrew Morton Cc: Yang Shi Signed-off-by: Linus Torvalds --- mm/mmap.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 74a84eb33b90..a5eb2f175da0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1779,9 +1779,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, */ pgoff = 0; get_area = shmem_get_unmapped_area; - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - /* Ensures that larger anonymous mappings are THP aligned. */ - get_area = thp_get_unmapped_area; } addr = get_area(file, addr, len, pgoff, flags); From 76dcd734eca23168cb008912c0f69ff408905235 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Dec 2022 14:48:12 -0800 Subject: [PATCH 874/963] Linux 6.1-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 78525ebea876..0992f827888d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* From 5c306de8f787ab7df51f846e57ac79cd713537d5 Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Fri, 2 Dec 2022 14:46:46 +0100 Subject: [PATCH 875/963] nfp: correct desc type when header dma len is 4096 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there's only one buffer to dma and its length is 4096, then only one data descriptor is needed to carry it according to current descriptor definition. So the descriptor type should be `simple` instead of `gather`, the latter requires more than one descriptor, otherwise it'll be dropped by application firmware. Fixes: c10d12e3dce8 ("nfp: add support for NFDK data path") Fixes: d9d950490a0a ("nfp: nfdk: implement xdp tx path for NFDK") Signed-off-by: Yinjun Zhang Reviewed-by: Richard Donkin Reviewed-by: Niklas Söderlund Signed-off-by: Simon Horman Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20221202134646.311108-1-simon.horman@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfdk/dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index 2b427d8ccb2f..ccacb6ab6c39 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -282,7 +282,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) dma_len = skb_headlen(skb); if (skb_is_gso(skb)) type = NFDK_DESC_TX_TYPE_TSO; - else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; @@ -927,7 +927,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, dma_len = pkt_len; dma_addr = rxbuf->dma_addr + dma_off; - if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; @@ -1325,7 +1325,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, txbuf = &tx_ring->ktxbufs[wr_idx]; dma_len = skb_headlen(skb); - if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; From b3d72d3135d2ef68296c1ee174436efd65386f04 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 30 Nov 2022 09:17:05 +0000 Subject: [PATCH 876/963] mac802154: fix missing INIT_LIST_HEAD in ieee802154_if_add() Kernel fault injection test reports null-ptr-deref as follows: BUG: kernel NULL pointer dereference, address: 0000000000000008 RIP: 0010:cfg802154_netdev_notifier_call+0x120/0x310 include/linux/list.h:114 Call Trace: raw_notifier_call_chain+0x6d/0xa0 kernel/notifier.c:87 call_netdevice_notifiers_info+0x6e/0xc0 net/core/dev.c:1944 unregister_netdevice_many_notify+0x60d/0xcb0 net/core/dev.c:1982 unregister_netdevice_queue+0x154/0x1a0 net/core/dev.c:10879 register_netdevice+0x9a8/0xb90 net/core/dev.c:10083 ieee802154_if_add+0x6ed/0x7e0 net/mac802154/iface.c:659 ieee802154_register_hw+0x29c/0x330 net/mac802154/main.c:229 mcr20a_probe+0xaaa/0xcb1 drivers/net/ieee802154/mcr20a.c:1316 ieee802154_if_add() allocates wpan_dev as netdev's private data, but not init the list in struct wpan_dev. cfg802154_netdev_notifier_call() manage the list when device register/unregister, and may lead to null-ptr-deref. Use INIT_LIST_HEAD() on it to initialize it correctly. Fixes: fcf39e6e88e9 ("ieee802154: add wpan_dev_list") Signed-off-by: Wei Yongjun Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20221130091705.1831140-1-weiyongjun@huaweicloud.com Signed-off-by: Stefan Schmidt --- net/mac802154/iface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 500ed1b81250..7e2065e72915 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -662,6 +662,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, sdata->dev = ndev; sdata->wpan_dev.wpan_phy = local->hw.phy; sdata->local = local; + INIT_LIST_HEAD(&sdata->wpan_dev.list); /* setup type-dependent data */ ret = ieee802154_setup_sdata(sdata, type); From 167b3f2dcc62c271f3555b33df17e361bb1fa0ee Mon Sep 17 00:00:00 2001 From: Valentina Goncharenko Date: Thu, 1 Dec 2022 20:34:07 +0300 Subject: [PATCH 877/963] net: encx24j600: Add parentheses to fix precedence In functions regmap_encx24j600_phy_reg_read() and regmap_encx24j600_phy_reg_write() in the conditions of the waiting cycles for filling the variable 'ret' it is necessary to add parentheses to prevent wrong assignment due to logical operations precedence. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d70e53262f5c ("net: Microchip encx24j600 driver") Signed-off-by: Valentina Goncharenko Reviewed-by: Pavan Chebbi Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/encx24j600-regmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 81a8ccca7e5e..2e337c7a5773 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) != 0) && (mistat & BUSY)) cpu_relax(); @@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) != 0) && (mistat & BUSY)) cpu_relax(); From 25f427ac7b8d89b0259f86c0c6407b329df742b2 Mon Sep 17 00:00:00 2001 From: Valentina Goncharenko Date: Thu, 1 Dec 2022 20:34:08 +0300 Subject: [PATCH 878/963] net: encx24j600: Fix invalid logic in reading of MISTAT register A loop for reading MISTAT register continues while regmap_read() fails and (mistat & BUSY), but if regmap_read() fails a value of mistat is undefined. The patch proposes to check for BUSY flag only when regmap_read() succeed. Compile test only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d70e53262f5c ("net: Microchip encx24j600 driver") Signed-off-by: Valentina Goncharenko Reviewed-by: Pavan Chebbi Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/encx24j600-regmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 2e337c7a5773..5693784eec5b 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); @@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); From 165df24186ecea95705505627df3dacf5e7ff6bf Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 2 Dec 2022 13:18:33 +0800 Subject: [PATCH 879/963] net: mdiobus: fix double put fwnode in the error path If phy_device_register() or fwnode_mdiobus_phy_device_register() fail, phy_device_free() is called, the device refcount is decreased to 0, then fwnode_handle_put() will be called in phy_device_release(), but in the error path, fwnode_handle_put() has already been called, so set fwnode to NULL after fwnode_handle_put() in the error path to avoid double put. Fixes: cdde1560118f ("net: mdiobus: fix unbalanced node reference count") Reported-by: Zeng Heng Tested-by: Zeng Heng Signed-off-by: Yang Yingliang Reviewed-by: Zeng Heng Tested-by: Zeng Heng Signed-off-by: David S. Miller --- drivers/net/mdio/fwnode_mdio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index eb344f6d4a7b..b782c35c4ac1 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -98,6 +98,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, */ rc = phy_device_register(phy); if (rc) { + device_set_node(&phy->mdio.dev, NULL); fwnode_handle_put(child); return rc; } @@ -153,7 +154,8 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, /* All data is now stored in the phy struct, so register it */ rc = phy_device_register(phy); if (rc) { - fwnode_handle_put(phy->mdio.dev.fwnode); + phy->mdio.dev.fwnode = NULL; + fwnode_handle_put(child); goto clean_phy; } } else if (is_of_node(child)) { From fbf33f5ac76f2cdb47ad9763f620026d5cfa57ce Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Fri, 2 Dec 2022 19:04:30 +0800 Subject: [PATCH 880/963] octeontx2-pf: Fix potential memory leak in otx2_init_tc() In otx2_init_tc(), if rhashtable_init() failed, it does not free tc->tc_entries_bitmap which is allocated in otx2_tc_alloc_ent_bitmap(). Fixes: 2e2a8126ffac ("octeontx2-pf: Unify flow management variables") Signed-off-by: Ziyang Xuan Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index e64318c110fd..6a01ab1a6e6f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -1134,7 +1134,12 @@ int otx2_init_tc(struct otx2_nic *nic) return err; tc->flow_ht_params = tc_flow_ht_params; - return rhashtable_init(&tc->flow_table, &tc->flow_ht_params); + err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params); + if (err) { + kfree(tc->tc_entries_bitmap); + tc->tc_entries_bitmap = NULL; + } + return err; } EXPORT_SYMBOL(otx2_init_tc); From 121c6672b0191ffcebff4b88ec022c39e0a95789 Mon Sep 17 00:00:00 2001 From: Casper Andersson Date: Fri, 2 Dec 2022 09:35:44 +0100 Subject: [PATCH 881/963] net: microchip: sparx5: correctly free skb in xmit consume_skb on transmitted, kfree_skb on dropped, do not free on TX_BUSY. Previously the xmit function could return -EBUSY without freeing, which supposedly is interpreted as a drop. And was using kfree on successfully transmitted packets. sparx5_fdma_xmit and sparx5_inject returns error code, where -EBUSY indicates TX_BUSY and any other error code indicates dropped. Fixes: f3cad2611a77 ("net: sparx5: add hostmode with phylink support") Signed-off-by: Casper Andersson Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller --- .../ethernet/microchip/sparx5/sparx5_fdma.c | 2 +- .../ethernet/microchip/sparx5/sparx5_packet.c | 41 +++++++++++-------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 66360c8c5a38..141897dfe388 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -317,7 +317,7 @@ int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb) next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry); db_hw = &next_dcb_hw->db[0]; if (!(db_hw->status & FDMA_DCB_STATUS_DONE)) - tx->dropped++; + return -EINVAL; db = list_first_entry(&tx->db_list, struct sparx5_db, list); list_move_tail(&db->list, &tx->db_list); next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 83c16ca5b30f..6db6ac6a3bbc 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -234,9 +234,8 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) sparx5_set_port_ifh(ifh, port->portno); if (sparx5->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { - ret = sparx5_ptp_txtstamp_request(port, skb); - if (ret) - return ret; + if (sparx5_ptp_txtstamp_request(port, skb) < 0) + return NETDEV_TX_BUSY; sparx5_set_port_ifh_rew_op(ifh, SPARX5_SKB_CB(skb)->rew_op); sparx5_set_port_ifh_pdu_type(ifh, SPARX5_SKB_CB(skb)->pdu_type); @@ -250,23 +249,31 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) else ret = sparx5_inject(sparx5, ifh, skb, dev); - if (ret == NETDEV_TX_OK) { - stats->tx_bytes += skb->len; - stats->tx_packets++; + if (ret == -EBUSY) + goto busy; + if (ret < 0) + goto drop; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) - return ret; + stats->tx_bytes += skb->len; + stats->tx_packets++; + sparx5->tx.packets++; - dev_kfree_skb_any(skb); - } else { - stats->tx_dropped++; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + return NETDEV_TX_OK; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) - sparx5_ptp_txtstamp_release(port, skb); - } - return ret; + dev_consume_skb_any(skb); + return NETDEV_TX_OK; +drop: + stats->tx_dropped++; + sparx5->tx.dropped++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +busy: + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + sparx5_ptp_txtstamp_release(port, skb); + return NETDEV_TX_BUSY; } static enum hrtimer_restart sparx5_injection_timeout(struct hrtimer *tmr) From d50b7914fae04d840ce36491d22133070b18cca9 Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Fri, 2 Dec 2022 08:52:48 +0000 Subject: [PATCH 882/963] xen-netfront: Fix NULL sring after live migration A NAPI is setup for each network sring to poll data to kernel The sring with source host is destroyed before live migration and new sring with target host is setup after live migration. The NAPI for the old sring is not deleted until setup new sring with target host after migration. With busy_poll/busy_read enabled, the NAPI can be polled before got deleted when resume VM. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: xennet_poll+0xae/0xd20 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI Call Trace: finish_task_switch+0x71/0x230 timerqueue_del+0x1d/0x40 hrtimer_try_to_cancel+0xb5/0x110 xennet_alloc_rx_buffers+0x2a0/0x2a0 napi_busy_loop+0xdb/0x270 sock_poll+0x87/0x90 do_sys_poll+0x26f/0x580 tracing_map_insert+0x1d4/0x2f0 event_hist_trigger+0x14a/0x260 finish_task_switch+0x71/0x230 __schedule+0x256/0x890 recalc_sigpending+0x1b/0x50 xen_sched_clock+0x15/0x20 __rb_reserve_next+0x12d/0x140 ring_buffer_lock_reserve+0x123/0x3d0 event_triggers_call+0x87/0xb0 trace_event_buffer_commit+0x1c4/0x210 xen_clocksource_get_cycles+0x15/0x20 ktime_get_ts64+0x51/0xf0 SyS_ppoll+0x160/0x1a0 SyS_ppoll+0x160/0x1a0 do_syscall_64+0x73/0x130 entry_SYSCALL_64_after_hwframe+0x41/0xa6 ... RIP: xennet_poll+0xae/0xd20 RSP: ffffb4f041933900 CR2: 0000000000000008 ---[ end trace f8601785b354351c ]--- xen frontend should remove the NAPIs for the old srings before live migration as the bond srings are destroyed There is a tiny window between the srings are set to NULL and the NAPIs are disabled, It is safe as the NAPI threads are still frozen at that time Signed-off-by: Lin Liu Fixes: 4ec2411980d0 ([NET]: Do not check netif_running() and carrier state in ->poll()) Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 9af2b027c19c..dc404e05970c 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1862,6 +1862,12 @@ static int netfront_resume(struct xenbus_device *dev) netif_tx_unlock_bh(info->netdev); xennet_disconnect_backend(info); + + rtnl_lock(); + if (info->queues) + xennet_destroy_queues(info); + rtnl_unlock(); + return 0; } From e8b4fc13900b8e8be48debffd0dfd391772501f7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 2 Dec 2022 12:58:26 +0300 Subject: [PATCH 883/963] net: mvneta: Prevent out of bounds read in mvneta_config_rss() The pp->indir[0] value comes from the user. It is passed to: if (cpu_online(pp->rxq_def)) inside the mvneta_percpu_elect() function. It needs bounds checkeding to ensure that it is not beyond the end of the cpu bitmap. Fixes: cad5d847a093 ("net: mvneta: Fix the CPU choice in mvneta_percpu_elect") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ff3e361e06e7..66b7f27c9a48 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4927,6 +4927,9 @@ static int mvneta_config_rss(struct mvneta_port *pp) napi_disable(&pp->napi); } + if (pp->indir[0] >= nr_cpu_ids) + return -EINVAL; + pp->rxq_def = pp->indir[0]; /* Update unicast mapping */ From 81c95fbaebfa5990c3c786c8c3e87426a33106fe Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Mon, 5 Dec 2022 21:14:12 +0800 Subject: [PATCH 884/963] iommu/vt-d: Fix buggy QAT device mask Impacted QAT device IDs that need extra dtlb flush quirk is ranging from 0x4940 to 0x4943. After bitwise AND device ID with 0xfffc the result should be 0x4940 instead of 0x494c to identify these devices. Fixes: e65a6897be5e ("iommu/vt-d: Add a fix for devices need extra dtlb flush") Reported-by: Raghunathan Srinivasan Signed-off-by: Ashok Raj Signed-off-by: Jacob Pan Link: https://lore.kernel.org/r/20221203005610.2927487-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5287efe247b1..644ca49e8cf8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1402,7 +1402,7 @@ static void domain_update_iotlb(struct dmar_domain *domain) * check because it applies only to the built-in QAT devices and it doesn't * grant additional privileges. */ -#define BUGGY_QAT_DEVID_MASK 0x494c +#define BUGGY_QAT_DEVID_MASK 0x4940 static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) { if (pdev->vendor != PCI_VENDOR_ID_INTEL) From 82e0572b23029b380464fa9fdc125db9c1506d0a Mon Sep 17 00:00:00 2001 From: Michal Jaron Date: Mon, 24 Oct 2022 10:19:42 +0200 Subject: [PATCH 885/963] i40e: Fix not setting default xps_cpus after reset During tx rings configuration default XPS queue config is set and __I40E_TX_XPS_INIT_DONE is locked. __I40E_TX_XPS_INIT_DONE state is cleared and set again with default mapping only during queues build, it means after first setup or reset with queues rebuild. (i.e. ethtool -L combined ) After other resets (i.e. ethtool -t ) XPS_INIT_DONE is not cleared and those default maps cannot be set again. It results in cleared xps_cpus mapping until queues are not rebuild or mapping is not set by user. Add clearing __I40E_TX_XPS_INIT_DONE state during reset to let the driver set xps_cpus to defaults again after it was cleared. Fixes: 6f853d4f8e93 ("i40e: allow XPS with QoS enabled") Signed-off-by: Michal Jaron Signed-off-by: Kamil Maziarz Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b3cb587a2032..6416322d7c18 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10654,6 +10654,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) return 0; } +/** + * i40e_clean_xps_state - clean xps state for every tx_ring + * @vsi: ptr to the VSI + **/ +static void i40e_clean_xps_state(struct i40e_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i]) + clear_bit(__I40E_TX_XPS_INIT_DONE, + vsi->tx_rings[i]->state); +} + /** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure @@ -10678,8 +10693,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf) i40e_pf_quiesce_all_vsi(pf); for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) + if (pf->vsi[v]) { + i40e_clean_xps_state(pf->vsi[v]); pf->vsi[v]->seid = 0; + } } i40e_shutdown_adminq(&pf->hw); From 08501970472077ed5de346ad89943a37d1692e9b Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Mon, 31 Oct 2022 13:00:28 +0100 Subject: [PATCH 886/963] i40e: Fix for VF MAC address 0 After spawning max VFs on a PF, some VFs were not getting resources and their MAC addresses were 0. This was caused by PF sleeping before flushing HW registers which caused VIRTCHNL_VFR_VFACTIVE to not be set in time for VF. Fix by adding a sleep after hw flush. Fixes: e4b433f4a741 ("i40e: reset all VFs in parallel when rebuilding PF") Signed-off-by: Sylwester Dziedziuch Signed-off-by: Jan Sokolowski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 72ddcefc45b1..635f93d60318 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1578,6 +1578,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_cleanup_reset_vf(vf); i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states); return true; @@ -1701,6 +1702,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) } i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(__I40E_VF_DISABLE, pf->state); return true; From d64aaf3f7869f915fd120763d75f11d6b116424d Mon Sep 17 00:00:00 2001 From: Przemyslaw Patynowski Date: Tue, 15 Nov 2022 09:49:25 +0100 Subject: [PATCH 887/963] i40e: Disallow ip4 and ip6 l4_4_bytes Return -EOPNOTSUPP, when user requests l4_4_bytes for raw IP4 or IP6 flow director filters. Flow director does not support filtering on l4 bytes for PCTYPEs used by IP4 and IP6 filters. Without this patch, user could create filters with l4_4_bytes fields, which did not do any filtering on L4, but only on L3 fields. Fixes: 36777d9fa24c ("i40e: check current configured input set when adding ntuple filters") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Kamil Maziarz Reviewed-by: Jacob Keller Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 4a6a6e48c615..f6fa63e4253c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4464,11 +4464,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, return -EOPNOTSUPP; /* First 4 bytes of L4 header */ - if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip4_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip4_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Type of Service is not supported. */ @@ -4507,11 +4503,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, else return -EOPNOTSUPP; - if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip6_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip6_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Traffic class is not supported. */ From b52be557e24c47286738276121177a41f54e3b83 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 5 Dec 2022 17:59:27 +0100 Subject: [PATCH 888/963] ipc/sem: Fix dangling sem_array access in semtimedop race When __do_semtimedop() goes to sleep because it has to wait for a semaphore value becoming zero or becoming bigger than some threshold, it links the on-stack sem_queue to the sem_array, then goes to sleep without holding a reference on the sem_array. When __do_semtimedop() comes back out of sleep, one of two things must happen: a) We prove that the on-stack sem_queue has been disconnected from the (possibly freed) sem_array, making it safe to return from the stack frame that the sem_queue exists in. b) We stabilize our reference to the sem_array, lock the sem_array, and detach the sem_queue from the sem_array ourselves. sem_array has RCU lifetime, so for case (b), the reference can be stabilized inside an RCU read-side critical section by locklessly checking whether the sem_queue is still connected to the sem_array. However, the current code does the lockless check on sem_queue before starting an RCU read-side critical section, so the result of the lockless check immediately becomes useless. Fix it by doing rcu_read_lock() before the lockless check. Now RCU ensures that if we observe the object being on our queue, the object can't be freed until rcu_read_unlock(). This bug is only hittable on kernel builds with full preemption support (either CONFIG_PREEMPT or PREEMPT_DYNAMIC with preempt=full). Fixes: 370b262c896e ("ipc/sem: avoid idr tree lookup for interrupted semop") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- ipc/sem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ipc/sem.c b/ipc/sem.c index c8496f98b139..00f88aa01ac5 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2179,14 +2179,15 @@ long __do_semtimedop(int semid, struct sembuf *sops, * scenarios where we were awakened externally, during the * window between wake_q_add() and wake_up_q(). */ + rcu_read_lock(); error = READ_ONCE(queue.status); if (error != -EINTR) { /* see SEM_BARRIER_2 for purpose/pairing */ smp_acquire__after_ctrl_dep(); + rcu_read_unlock(); goto out; } - rcu_read_lock(); locknum = sem_lock(sma, sops, nsops); if (!ipc_valid_object(&sma->sem_perm)) From 6e90293618ed476d6b11f82ce724efbb9e9a071b Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 1 Dec 2022 12:53:41 -0500 Subject: [PATCH 889/963] drm/vmwgfx: Don't use screen objects when SEV is active When SEV is enabled gmr's and mob's are explicitly disabled because the encrypted system memory can not be used by the hypervisor. The driver was disabling GMR's but the presentation code, which depends on GMR's, wasn't honoring it which lead to black screen on hosts with SEV enabled. Make sure screen objects presentation is not used when guest memory regions have been disabled to fix presentation on SEV enabled hosts. Fixes: 3b0d6458c705 ("drm/vmwgfx: Refuse DMA operation when SEV encryption is active") Cc: # v5.7+ Signed-off-by: Zack Rusin Reported-by: Nicholas Hunt Reviewed-by: Martin Krastev Link: https://patchwork.freedesktop.org/patch/msgid/20221201175341.491884-1-zack@kde.org --- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index ecd3c2fc978b..9c79873f62f0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -949,6 +949,10 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int i, ret; + /* Screen objects won't work if GMR's aren't available */ + if (!dev_priv->has_gmr) + return -ENOSYS; + if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) { return -ENOSYS; } From e6cfaf34be9fcd1a8285a294e18986bfc41a409c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 5 Dec 2022 11:33:40 -0800 Subject: [PATCH 890/963] proc: avoid integer type confusion in get_proc_long proc_get_long() is passed a size_t, but then assigns it to an 'int' variable for the length. Let's not do that, even if our IO paths are limited to MAX_RW_COUNT (exactly because of these kinds of type errors). So do the proper test in the rigth type. Reported-by: Kyle Zeng Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 188c305aeb8b..8898ddeaaf75 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -342,13 +342,12 @@ static int proc_get_long(char **buf, size_t *size, unsigned long *val, bool *neg, const char *perm_tr, unsigned perm_tr_len, char *tr) { - int len; char *p, tmp[TMPBUFLEN]; + ssize_t len = *size; - if (!*size) + if (len <= 0) return -EINVAL; - len = *size; if (len > TMPBUFLEN - 1) len = TMPBUFLEN - 1; From bce9332220bd677d83b19d21502776ad555a0e73 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 5 Dec 2022 12:09:06 -0800 Subject: [PATCH 891/963] proc: proc_skip_spaces() shouldn't think it is working on C strings proc_skip_spaces() seems to think it is working on C strings, and ends up being just a wrapper around skip_spaces() with a really odd calling convention. Instead of basing it on skip_spaces(), it should have looked more like proc_skip_char(), which really is the exact same function (except it skips a particular character, rather than whitespace). So use that as inspiration, odd coding and all. Now the calling convention actually makes sense and works for the intended purpose. Reported-and-tested-by: Kyle Zeng Acked-by: Eric Dumazet Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8898ddeaaf75..c6d9dec11b74 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -267,13 +267,14 @@ int proc_dostring(struct ctl_table *table, int write, ppos); } -static size_t proc_skip_spaces(char **buf) +static void proc_skip_spaces(char **buf, size_t *size) { - size_t ret; - char *tmp = skip_spaces(*buf); - ret = tmp - *buf; - *buf = tmp; - return ret; + while (*size) { + if (!isspace(**buf)) + break; + (*size)--; + (*buf)++; + } } static void proc_skip_char(char **buf, size_t *size, const char v) @@ -520,7 +521,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, bool neg; if (write) { - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -547,7 +548,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; @@ -589,7 +590,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) { err = -EINVAL; goto out_free; @@ -609,7 +610,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, } if (!err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); out_free: if (err) @@ -1074,7 +1075,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (write) { bool neg; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -1103,7 +1104,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; From e329e71013c9b5a4535b099208493c7826ee4a64 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Dec 2022 13:44:14 -0800 Subject: [PATCH 892/963] NFC: nci: Bounds check struct nfc_target arrays While running under CONFIG_FORTIFY_SOURCE=y, syzkaller reported: memcpy: detected field-spanning write (size 129) of single field "target->sensf_res" at net/nfc/nci/ntf.c:260 (size 18) This appears to be a legitimate lack of bounds checking in nci_add_new_protocol(). Add the missing checks. Reported-by: syzbot+210e196cef4711b65139@syzkaller.appspotmail.com Link: https://lore.kernel.org/lkml/0000000000001c590f05ee7b3ff4@google.com Fixes: 019c4fbaa790 ("NFC: Add NCI multiple targets support") Signed-off-by: Kees Cook Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20221202214410.never.693-kees@kernel.org Signed-off-by: Jakub Kicinski --- net/nfc/nci/ntf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 282c51051dcc..994a0a1efb58 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -240,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, target->sens_res = nfca_poll->sens_res; target->sel_res = nfca_poll->sel_res; target->nfcid1_len = nfca_poll->nfcid1_len; + if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1)) + return -EPROTO; if (target->nfcid1_len > 0) { memcpy(target->nfcid1, nfca_poll->nfcid1, target->nfcid1_len); @@ -248,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params; target->sensb_res_len = nfcb_poll->sensb_res_len; + if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res)) + return -EPROTO; if (target->sensb_res_len > 0) { memcpy(target->sensb_res, nfcb_poll->sensb_res, target->sensb_res_len); @@ -256,6 +260,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params; target->sensf_res_len = nfcf_poll->sensf_res_len; + if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res)) + return -EPROTO; if (target->sensf_res_len > 0) { memcpy(target->sensf_res, nfcf_poll->sensf_res, target->sensf_res_len); From 6f2d71524bcfdeb1fcbd22a4a92a5b7b161ab224 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Thu, 1 Dec 2022 13:52:34 +0100 Subject: [PATCH 893/963] nvme initialize core quirks before calling nvme_init_subsystem A device might have a core quirk for NVME_QUIRK_IGNORE_DEV_SUBNQN (such as Samsung X5) but it would still give a: "missing or invalid SUBNQN field" warning as core quirks are filled after calling nvme_init_subnqn. Fill ctrl->quirks from struct core_quirks before calling nvme_init_subsystem to fix this. Tested on a Samsung X5. Fixes: ab9e00cc72fa ("nvme: track subsystems") Signed-off-by: Pankaj Raghav Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 69e333922bea..7e3893d06bab 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3095,10 +3095,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (!ctrl->identified) { unsigned int i; - ret = nvme_init_subsystem(ctrl, id); - if (ret) - goto out_free; - /* * Check for quirks. Quirk can depend on firmware version, * so, in principle, the set of quirks present can change @@ -3111,6 +3107,10 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) if (quirk_matches(id, &core_quirks[i])) ctrl->quirks |= core_quirks[i].quirks; } + + ret = nvme_init_subsystem(ctrl, id); + if (ret) + goto out_free; } memcpy(ctrl->subsys->firmware_rev, id->fr, sizeof(ctrl->subsys->firmware_rev)); From 63ff545af73f759d1bd04198af8ed8577fb739fc Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Mon, 5 Dec 2022 14:19:56 +0800 Subject: [PATCH 894/963] gpio/rockchip: fix refcount leak in rockchip_gpiolib_register() The node returned by of_get_parent() with refcount incremented, of_node_put() needs be called when finish using it. So add it in the end of of_pinctrl_get(). Fixes: 936ee2675eee ("gpio/rockchip: add driver for rockchip gpio") Signed-off-by: Wang Yufen Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-rockchip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 870910bb9dd3..200e43a6f4b4 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -610,6 +610,7 @@ static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank) return -ENODATA; pctldev = of_pinctrl_get(pctlnp); + of_node_put(pctlnp); if (!pctldev) return -ENODEV; From 61d4f140943c47c1386ed89f7260e00418dfad9d Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 3 Dec 2022 00:17:39 +0800 Subject: [PATCH 895/963] net: stmmac: fix "snps,axi-config" node property parsing In dt-binding snps,dwmac.yaml, some properties under "snps,axi-config" node are named without "axi_" prefix, but the driver expects the prefix. Since the dt-binding has been there for a long time, we'd better make driver match the binding for compatibility. Fixes: afea03656add ("stmmac: rework DMA bus setting and introduce new platform AXI structure") Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221202161739.2203-1-jszhang@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 50f6b4a14be4..eb6d9cd8e93f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en"); axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm"); - axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe"); - axi->axi_fb = of_property_read_bool(np, "snps,axi_fb"); - axi->axi_mb = of_property_read_bool(np, "snps,axi_mb"); - axi->axi_rb = of_property_read_bool(np, "snps,axi_rb"); + axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe"); + axi->axi_fb = of_property_read_bool(np, "snps,fb"); + axi->axi_mb = of_property_read_bool(np, "snps,mb"); + axi->axi_rb = of_property_read_bool(np, "snps,rb"); if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) axi->axi_wr_osr_lmt = 1; From 18010ff776fa42340efc428b3ea6d19b3e7c7b21 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 2 Dec 2022 11:43:10 -0800 Subject: [PATCH 896/963] net: mana: Fix race on per-CQ variable napi work_done After calling napi_complete_done(), the NAPIF_STATE_SCHED bit may be cleared, and another CPU can start napi thread and access per-CQ variable, cq->work_done. If the other thread (for example, from busy_poll) sets it to a value >= budget, this thread will continue to run when it should stop, and cause memory corruption and panic. To fix this issue, save the per-CQ work_done variable in a local variable before napi_complete_done(), so it won't be corrupted by a possible concurrent thread after napi_complete_done(). Also, add a flag bit to advertise to the NIC firmware: the NAPI work_done variable race is fixed, so the driver is able to reliably support features like busy_poll. Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Haiyang Zhang Link: https://lore.kernel.org/r/1670010190-28595-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/gdma.h | 9 ++++++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 16 +++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/gdma.h b/drivers/net/ethernet/microsoft/mana/gdma.h index 4a6efe6ada08..65c24ee49efd 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma.h +++ b/drivers/net/ethernet/microsoft/mana/gdma.h @@ -498,7 +498,14 @@ enum { #define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0) -#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT +/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed, + * so the driver is able to reliably support features like busy_poll. + */ +#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) + +#define GDMA_DRV_CAP_FLAGS1 \ + (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ + GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 9259a74eca40..27a0f3af8aab 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1303,10 +1303,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) xdp_do_flush(); } -static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) +static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; u8 arm_bit; + int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1315,26 +1316,31 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) else mana_poll_tx_cq(cq); - if (cq->work_done < cq->budget && - napi_complete_done(&cq->napi, cq->work_done)) { + w = cq->work_done; + + if (w < cq->budget && + napi_complete_done(&cq->napi, w)) { arm_bit = SET_ARM_BIT; } else { arm_bit = 0; } mana_gd_ring_cq(gdma_queue, arm_bit); + + return w; } static int mana_poll(struct napi_struct *napi, int budget) { struct mana_cq *cq = container_of(napi, struct mana_cq, napi); + int w; cq->work_done = 0; cq->budget = budget; - mana_cq_handler(cq, cq->gdma_cq); + w = mana_cq_handler(cq, cq->gdma_cq); - return min(cq->work_done, budget); + return min(w, budget); } static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) From 23353efc26e98b61b925274ecbb8f0610f69a8aa Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Sat, 3 Dec 2022 10:09:03 +0800 Subject: [PATCH 897/963] net: wwan: iosm: fix memory leak in ipc_mux_init() When failed to alloc ipc_mux->ul_adb.pp_qlt in ipc_mux_init(), ipc_mux is not released. Fixes: 1f52d7b62285 ("net: wwan: iosm: Enable M.2 7360 WWAN card support") Signed-off-by: Zhengchao Shao Reviewed-by: M Chetan Kumar Link: https://lore.kernel.org/r/20221203020903.383235-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/wwan/iosm/iosm_ipc_mux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c index 9c7a9a2a1f25..fc928b298a98 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c @@ -332,6 +332,7 @@ struct iosm_mux *ipc_mux_init(struct ipc_mux_config *mux_cfg, if (!ipc_mux->ul_adb.pp_qlt[i]) { for (j = i - 1; j >= 0; j--) kfree(ipc_mux->ul_adb.pp_qlt[j]); + kfree(ipc_mux); return NULL; } } From ee496694b9eea651ae1aa4c4667d886cdf74aa3b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sat, 3 Dec 2022 11:28:58 +0800 Subject: [PATCH 898/963] ip_gre: do not report erspan version on GRE interface Although the type I ERSPAN is based on the barebones IP + GRE encapsulation and no extra ERSPAN header. Report erspan version on GRE interface looks unreasonable. Fix this by separating the erspan and gre fill info. IPv6 GRE does not have this info as IPv6 only supports erspan version 1 and 2. Reported-by: Jianlin Shi Fixes: f989d546a2d5 ("erspan: Add type I version 0 support.") Signed-off-by: Hangbin Liu Acked-by: William Tu Link: https://lore.kernel.org/r/20221203032858.3130339-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- net/ipv4/ip_gre.c | 48 ++++++++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f866d6282b2b..cae9f1a4e059 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1492,24 +1492,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; __be16 o_flags = p->o_flags; - if (t->erspan_ver <= 2) { - if (t->erspan_ver != 0 && !t->collect_md) - o_flags |= TUNNEL_KEY; - - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) - goto nla_put_failure; - - if (t->erspan_ver == 1) { - if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) - goto nla_put_failure; - } else if (t->erspan_ver == 2) { - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) - goto nla_put_failure; - if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) - goto nla_put_failure; - } - } - if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || @@ -1550,6 +1532,34 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) return -EMSGSIZE; } +static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + + if (t->erspan_ver <= 2) { + if (t->erspan_ver != 0 && !t->collect_md) + t->parms.o_flags |= TUNNEL_KEY; + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) + goto nla_put_failure; + + if (t->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) + goto nla_put_failure; + } else if (t->erspan_ver == 2) { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) + goto nla_put_failure; + } + } + + return ipgre_fill_info(skb, dev); + +nla_put_failure: + return -EMSGSIZE; +} + static void erspan_setup(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); @@ -1628,7 +1638,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { .changelink = erspan_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, - .fill_info = ipgre_fill_info, + .fill_info = erspan_fill_info, .get_link_net = ip_tunnel_get_link_net, }; From 7b8232bdb1789a257de3129a9bb08c69b93a17db Mon Sep 17 00:00:00 2001 From: Qiheng Lin Date: Sat, 3 Dec 2022 15:02:59 +0800 Subject: [PATCH 899/963] net: microchip: sparx5: Fix missing destroy_workqueue of mact_queue The mchp_sparx5_probe() won't destroy workqueue created by create_singlethread_workqueue() in sparx5_start() when later inits failed. Add destroy_workqueue in the cleanup_ports case, also add it in mchp_sparx5_remove() Fixes: b37a1bae742f ("net: sparx5: add mactable support") Signed-off-by: Qiheng Lin Link: https://lore.kernel.org/r/20221203070259.19560-1-linqiheng@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index eeac04b84638..b6bbb3c9bd7a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -887,6 +887,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev) cleanup_ports: sparx5_cleanup_ports(sparx5); + if (sparx5->mact_queue) + destroy_workqueue(sparx5->mact_queue); cleanup_config: kfree(configs); cleanup_pnode: @@ -911,6 +913,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev) sparx5_cleanup_ports(sparx5); /* Unregister netdevs */ sparx5_unregister_notifier_blocks(sparx5); + destroy_workqueue(sparx5->mact_queue); return 0; } From 5a5a3e564de6a8db987410c5c2f4748d50ea82b8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 3 Dec 2022 17:29:41 +0800 Subject: [PATCH 900/963] ravb: Fix potential use-after-free in ravb_rx_gbeth() The skb is delivered to napi_gro_receive() which may free it, after calling this, dereferencing skb may trigger use-after-free. Fixes: 1c59eb678cbd ("ravb: Fillup ravb_rx_gbeth() stub") Signed-off-by: YueHaibing Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20221203092941.10880-1-yuehaibing@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/renesas/ravb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 6bc923326268..33f723a9f471 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -841,7 +841,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) napi_gro_receive(&priv->napi[q], priv->rx_1st_skb); stats->rx_packets++; - stats->rx_bytes += priv->rx_1st_skb->len; + stats->rx_bytes += pkt_len; break; } } From 42330a32933fb42180c52022804dcf09f47a2f99 Mon Sep 17 00:00:00 2001 From: Yongqiang Liu Date: Sat, 3 Dec 2022 09:41:25 +0000 Subject: [PATCH 901/963] net: thunderx: Fix missing destroy_workqueue of nicvf_rx_mode_wq The nicvf_probe() won't destroy workqueue when register_netdev() failed. Add destroy_workqueue err handle case to fix this issue. Fixes: 2ecbe4f4a027 ("net: thunderx: replace global nicvf_rx_mode_wq work queue for all VFs to private for each of them.") Signed-off-by: Yongqiang Liu Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20221203094125.602812-1-liuyongqiang13@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 98f3dc460ca7..f2f95493ec89 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2239,7 +2239,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_unregister_interrupts; + goto err_destroy_workqueue; } nic->msg_enable = debug; @@ -2248,6 +2248,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +err_destroy_workqueue: + destroy_workqueue(nic->nicvf_rx_mode_wq); err_unregister_interrupts: nicvf_unregister_interrupts(nic); err_free_netdev: From 4640177049549de1a43e9bc49265f0cdfce08cfd Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 3 Dec 2022 17:42:39 +0800 Subject: [PATCH 902/963] net: hisilicon: Fix potential use-after-free in hisi_femac_rx() The skb is delivered to napi_gro_receive() which may free it, after calling this, dereferencing skb may trigger use-after-free. Fixes: 542ae60af24f ("net: hisilicon: Add Fast Ethernet MAC driver") Signed-off-by: Liu Jian Link: https://lore.kernel.org/r/20221203094240.1240211-1-liujian56@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hisi_femac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 93846bace028..ce2571c16e43 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = (pos + 1) % rxq->num; if (rx_pkts_num >= limit) From cb37617687f2bfa5b675df7779f869147c9002bd Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Sat, 3 Dec 2022 15:34:41 +0800 Subject: [PATCH 903/963] net: mdio: fix unbalanced fwnode reference count in mdio_device_release() There is warning report about of_node refcount leak while probing mdio device: OF: ERROR: memory leak, expected refcount 1 instead of 2, of_node_get()/of_node_put() unbalanced - destroy cset entry: attach overlay node /spi/soc@0/mdio@710700c0/ethernet@4 In of_mdiobus_register_device(), we increase fwnode refcount by fwnode_handle_get() before associating the of_node with mdio device, but it has never been decreased in normal path. Since that, in mdio_device_release(), it needs to call fwnode_handle_put() in addition instead of calling kfree() directly. After above, just calling mdio_device_free() in the error handle path of of_mdiobus_register_device() is enough to keep the refcount balanced. Fixes: a9049e0c513c ("mdio: Add support for mdio drivers.") Signed-off-by: Zeng Heng Reviewed-by: Yang Yingliang Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20221203073441.3885317-1-zengheng4@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/mdio/of_mdio.c | 3 ++- drivers/net/phy/mdio_device.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 796e9c7857d0..510822d6d0d9 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -68,8 +68,9 @@ static int of_mdiobus_register_device(struct mii_bus *mdio, /* All data is now stored in the mdiodev struct; register it. */ rc = mdio_device_register(mdiodev); if (rc) { + device_set_node(&mdiodev->dev, NULL); + fwnode_handle_put(fwnode); mdio_device_free(mdiodev); - of_node_put(child); return rc; } diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index 250742ffdfd9..044828d081d2 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -21,6 +21,7 @@ #include #include #include +#include void mdio_device_free(struct mdio_device *mdiodev) { @@ -30,6 +31,7 @@ EXPORT_SYMBOL(mdio_device_free); static void mdio_device_release(struct device *dev) { + fwnode_handle_put(dev->fwnode); kfree(to_mdio_device(dev)); } From 433c07a13f59856e4585e89e86b7d4cc59348fab Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Sat, 3 Dec 2022 17:42:40 +0800 Subject: [PATCH 904/963] net: hisilicon: Fix potential use-after-free in hix5hd2_rx() The skb is delivered to napi_gro_receive() which may free it, after calling this, dereferencing skb may trigger use-after-free. Fixes: 57c5bc9ad7d7 ("net: hisilicon: add hix5hd2 mac driver") Signed-off-by: Liu Jian Link: https://lore.kernel.org/r/20221203094240.1240211-2-liujian56@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index ffcf797dfa90..f867e9531117 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = dma_ring_incr(pos, RX_DESC_NUM); } From 743117a997bbd4840e827295c07e59bcd7f7caa3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 3 Dec 2022 17:46:35 +0800 Subject: [PATCH 905/963] tipc: Fix potential OOB in tipc_link_proto_rcv() Fix the potential risk of OOB if skb_linearize() fails in tipc_link_proto_rcv(). Fixes: 5cbb28a4bf65 ("tipc: linearize arriving NAME_DISTR and LINK_PROTO buffers") Signed-off-by: YueHaibing Link: https://lore.kernel.org/r/20221203094635.29024-1-yuehaibing@huawei.com Signed-off-by: Paolo Abeni --- net/tipc/link.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index e260c0d557f5..b3ce24823f50 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2224,7 +2224,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - skb_linearize(skb); + if (skb_linearize(skb)) + goto exit; + hdr = buf_msg(skb); data = msg_data(hdr); From ad7f402ae4f466647c3a669b8a6f3e5d4271c84a Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 22 Nov 2022 09:16:59 +0000 Subject: [PATCH 906/963] xen/netback: Ensure protocol headers don't fall in the non-linear area In some cases, the frontend may send a packet where the protocol headers are spread across multiple slots. This would result in netback creating an skb where the protocol headers spill over into the non-linear area. Some drivers and NICs don't handle this properly resulting in an interface reset or worse. This issue was introduced by the removal of an unconditional skb pull in the tx path to improve performance. Fix this without reintroducing the pull by setting up grant copy ops for as many slots as needed to reach the XEN_NETBACK_TX_COPY_LEN size. Adjust the rest of the code to handle multiple copy operations per skb. This is XSA-423 / CVE-2022-3643. Fixes: 7e5d7753956b ("xen-netback: remove unconditional __pskb_pull_tail() in guest Tx path") Signed-off-by: Ross Lagerwall Reviewed-by: Paul Durrant Signed-off-by: Juergen Gross --- drivers/net/xen-netback/netback.c | 223 ++++++++++++++++-------------- 1 file changed, 123 insertions(+), 100 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 3d2081bbbc86..054ac0e897f6 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -332,10 +332,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { - u16 pending_idx; + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) +#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, u16 pending_idx, @@ -370,31 +373,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) return skb; } -static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, - struct sk_buff *skb, - struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop, - unsigned int frag_overflow, - struct sk_buff *nskb) +static void xenvif_get_requests(struct xenvif_queue *queue, + struct sk_buff *skb, + struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txfrags, + unsigned *copy_ops, + unsigned *map_ops, + unsigned int frag_overflow, + struct sk_buff *nskb, + unsigned int extra_count, + unsigned int data_len) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; - int start; + u16 pending_idx; pending_ring_idx_t index; unsigned int nr_slots; + struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; + struct xen_netif_tx_request *txp = first; - nr_slots = shinfo->nr_frags; + nr_slots = shinfo->nr_frags + 1; - /* Skip first skb fragment if it is on same page as header fragment. */ - start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); + copy_count(skb) = 0; - for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; + cop->source.offset = txp->offset; + + cop->dest.domid = DOMID_SELF; + cop->dest.offset = (offset_in_page(skb->data + + skb_headlen(skb) - + data_len)) & ~XEN_PAGE_MASK; + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + + cop->len = amount; + cop->flags = GNTCOPY_source_gref; + + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; + copy_count(skb)++; + + cop++; + data_len -= amount; + + if (amount == txp->size) { + /* The copy op covered the full tx_request */ + + memcpy(&queue->pending_tx_info[pending_idx].req, + txp, sizeof(*txp)); + queue->pending_tx_info[pending_idx].extra_count = + (txp == first) ? extra_count : 0; + + if (txp == first) + txp = txfrags; + else + txp++; + queue->pending_cons++; + nr_slots--; + } else { + /* The copy op partially covered the tx_request. + * The remainder will be mapped. + */ + txp->offset += amount; + txp->size -= amount; + } + } + + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; + shinfo->nr_frags++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; - xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); + xenvif_tx_create_map_op(queue, pending_idx, txp, + txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + + if (txp == first) + txp = txfrags; + else + txp++; } if (frag_overflow) { @@ -415,7 +480,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que skb_shinfo(skb)->frag_list = nskb; } - return gop; + (*copy_ops) = cop - queue->tx_copy_ops; + (*map_ops) = gop - queue->tx_map_ops; } static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, @@ -451,7 +517,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct gnttab_copy **gopp_copy) { struct gnttab_map_grant_ref *gop_map = *gopp_map; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list */ @@ -462,24 +528,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && - frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; + frag_get_pending_idx(&shinfo->frags[0]) == + copy_pending_idx(skb, copy_count(skb) - 1); int i, err; - /* Check status of header. */ - err = (*gopp_copy)->status; - if (unlikely(err)) { - if (net_ratelimit()) - netdev_dbg(queue->vif->dev, - "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", - (*gopp_copy)->status, - pending_idx, - (*gopp_copy)->source.u.ref); - /* The first frag might still have this slot mapped */ - if (!sharedslot) - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_ERROR); + for (i = 0; i < copy_count(skb); i++) { + int newerr; + + /* Check status of header. */ + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } else { + err = newerr; + if (net_ratelimit()) + netdev_dbg(queue->vif->dev, + "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", + (*gopp_copy)->status, + pending_idx, + (*gopp_copy)->source.u.ref); + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + } + (*gopp_copy)++; } - (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { @@ -526,14 +605,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, if (err) continue; - /* First error: if the header haven't shared a slot with the - * first frag, release it as well. - */ - if (!sharedslot) - xenvif_idx_release(queue, - XENVIF_TX_CB(skb)->pending_idx, - XEN_NETIF_RSP_OKAY); - /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); @@ -803,7 +874,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops; struct sk_buff *skb, *nskb; int ret; unsigned int frag_overflow; @@ -885,8 +955,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, continue; } + data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? + XEN_NETBACK_TX_COPY_LEN : txreq.size; + ret = xenvif_count_requests(queue, &txreq, extra_count, txfrags, work_to_do); + if (unlikely(ret < 0)) break; @@ -912,9 +986,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, index = pending_index(queue->pending_cons); pending_idx = queue->pending_ring[index]; - data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && - ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? - XEN_NETBACK_TX_COPY_LEN : txreq.size; + if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) + data_len = txreq.size; skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) { @@ -925,8 +998,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, } skb_shinfo(skb)->nr_frags = ret; - if (data_len < txreq.size) - skb_shinfo(skb)->nr_frags++; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ @@ -988,54 +1059,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, type); } - XENVIF_TX_CB(skb)->pending_idx = pending_idx; - - __skb_put(skb, data_len); - queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; - queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; - queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; - - queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_gfn(skb->data); - queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; - queue->tx_copy_ops[*copy_ops].dest.offset = - offset_in_page(skb->data) & ~XEN_PAGE_MASK; - - queue->tx_copy_ops[*copy_ops].len = data_len; - queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; - - (*copy_ops)++; - - if (data_len < txreq.size) { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - pending_idx); - xenvif_tx_create_map_op(queue, pending_idx, &txreq, - extra_count, gop); - gop++; - } else { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - INVALID_PENDING_IDX); - memcpy(&queue->pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - queue->pending_tx_info[pending_idx].extra_count = - extra_count; - } - - queue->pending_cons++; - - gop = xenvif_get_requests(queue, skb, txfrags, gop, - frag_overflow, nskb); + xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, + map_ops, frag_overflow, nskb, extra_count, + data_len); __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || + if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) break; } - (*map_ops) = gop - queue->tx_map_ops; return; } @@ -1114,9 +1150,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; - unsigned data_len; - pending_idx = XENVIF_TX_CB(skb)->pending_idx; + pending_idx = copy_pending_idx(skb, 0); txp = &queue->pending_tx_info[pending_idx].req; /* Check the remap error code. */ @@ -1135,18 +1170,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - data_len = skb->len; - callback_param(queue, pending_idx).ctx = NULL; - if (data_len < txp->size) { - /* Append the packet payload as a fragment. */ - txp->offset += data_len; - txp->size -= data_len; - } else { - /* Schedule a response immediately. */ - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_OKAY); - } - if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) @@ -1332,7 +1355,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget) { - unsigned nr_mops, nr_cops = 0; + unsigned nr_mops = 0, nr_cops = 0; int work_done, ret; if (unlikely(!tx_work_todo(queue))) From 74e7e1efdad45580cc3839f2a155174cf158f9b5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 6 Dec 2022 08:54:24 +0100 Subject: [PATCH 907/963] xen/netback: don't call kfree_skb() with interrupts disabled It is not allowed to call kfree_skb() from hardware interrupt context or with interrupts being disabled. So remove kfree_skb() from the spin_lock_irqsave() section and use the already existing "drop" label in xenvif_start_xmit() for dropping the SKB. At the same time replace the dev_kfree_skb() call there with a call of dev_kfree_skb_any(), as xenvif_start_xmit() can be called with disabled interrupts. This is XSA-424 / CVE-2022-42328 / CVE-2022-42329. Fixes: be81992f9086 ("xen/netback: don't queue unlimited number of packages") Reported-by: Yang Yingliang Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross --- drivers/net/xen-netback/common.h | 2 +- drivers/net/xen-netback/interface.c | 6 ++++-- drivers/net/xen-netback/rx.c | 8 +++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 1545cbee77a4..3dbfc8a6924e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 650fa180220f..f3f2c07423a6 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -254,14 +254,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb); - xenvif_rx_queue_tail(queue, skb); + if (!xenvif_rx_queue_tail(queue, skb)) + goto drop; + xenvif_kick_thread(queue); return NETDEV_TX_OK; drop: vif->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 932762177110..0ba754ebc5ba 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) return false; } -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) { unsigned long flags; + bool ret = true; spin_lock_irqsave(&queue->rx_queue.lock, flags); @@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - kfree_skb(skb); - queue->vif->dev->stats.rx_dropped++; + ret = false; } else { if (skb_queue_empty(&queue->rx_queue)) xenvif_update_needed_slots(queue, skb); @@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + + return ret; } static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) From b7d9aae404841d9999b7476170867ae441e948d2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Dec 2022 10:34:03 +0000 Subject: [PATCH 908/963] Revert "arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()" This reverts commit c44094eee32f32f175aadc0efcac449d99b1bbf7. Although the semantics of the DMA API require only a clean operation here, it turns out that the Qualcomm 'qcom_q6v5_mss' remoteproc driver (ab)uses the DMA API for transferring the modem firmware to the secure world via calls to Trustzone [1]. Once the firmware buffer has changed hands, _any_ access from the non-secure side (i.e. Linux) will be detected on the bus and result in a full system reset [2]. Although this is possible even with this revert in place (due to speculative reads via the cacheable linear alias of memory), anecdotally the problem occurs considerably more frequently when the lines have not been invalidated, assumedly due to some micro-architectural interactions with the cache hierarchy. Revert the offending change for now, along with a comment, so that the Qualcomm developers have time to fix the driver [3] to use a firmware buffer which does not have a cacheable alias in the linear map. Link: https://lore.kernel.org/r/20221114110329.68413-1-manivannan.sadhasivam@linaro.org [1] Link: https://lore.kernel.org/r/CAMi1Hd3H2k1J8hJ6e-Miy5+nVDNzv6qQ3nN-9929B0GbHJkXEg@mail.gmail.com/ [2] Link: https://lore.kernel.org/r/20221206092152.GD15486@thinkpad [2] Reported-by: Amit Pundir Reported-by: Manivannan Sadhasivam Cc: Thorsten Leemhuis Cc: Sibi Sankar Signed-off-by: Will Deacon Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20221206103403.646-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3cb101e8cb29..5240f6acad64 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -36,7 +36,22 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { unsigned long start = (unsigned long)page_address(page); - dcache_clean_poc(start, start + size); + /* + * The architecture only requires a clean to the PoC here in order to + * meet the requirements of the DMA API. However, some vendors (i.e. + * Qualcomm) abuse the DMA API for transferring buffers from the + * non-secure to the secure world, resetting the system if a non-secure + * access shows up after the buffer has been transferred: + * + * https://lore.kernel.org/r/20221114110329.68413-1-manivannan.sadhasivam@linaro.org + * + * Using clean+invalidate appears to make this issue less likely, but + * the drivers themselves still need fixing as the CPU could issue a + * speculative read from the buffer via the linear mapping irrespective + * of the cache maintenance we use. Once the drivers are fixed, we can + * relax this to a clean operation. + */ + dcache_clean_inval_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA From 449d8c616f8a6977d85d145ce14362fdbcbf7c06 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 6 Dec 2022 21:14:55 +0000 Subject: [PATCH 909/963] ANDROID: remove leftover includes from revert The android_vh_kfree_skb tracehook was reverted in commit c1e292ea7c7c ("ANDROID: remove the android_vh_kfree_skb tracehook"), so we don't need the includes for the trace/hooks/net.h header either as this was added in the original patch for the tracehook. This aligns with upstream. Bug: 163716381 Signed-off-by: Carlos Llamas Change-Id: Iebb3d82bb56c6cbbe5abef3fec6e926bd2335d50 --- net/core/skbuff.c | 1 - net/ipv4/tcp_input.c | 1 - 2 files changed, 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5293ff276e7a..88fa40571d0c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -79,7 +79,6 @@ #include #include #include -#include #include "dev.h" #include "sock_destructor.h" diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f5107e37ead2..0640453fce54 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -80,7 +80,6 @@ #include #include #include -#include int sysctl_tcp_max_orphans __read_mostly = NR_FILE; From d95d140e83634e7914277862dee841564d02879f Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 6 Dec 2022 09:34:16 +0100 Subject: [PATCH 910/963] ata: libahci_platform: ahci_platform_find_clk: oops, NULL pointer When booting a arm 32-bit kernel with config CONFIG_AHCI_DWC enabled on a am57xx-evm board. This happens when the clock references are unnamed in DT, the strcmp() produces a NULL pointer dereference, see the following oops, NULL pointer dereference: [ 4.673950] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 4.682098] [00000000] *pgd=00000000 [ 4.685699] Internal error: Oops: 5 [#1] SMP ARM [ 4.690338] Modules linked in: [ 4.693420] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc7 #1 [ 4.699615] Hardware name: Generic DRA74X (Flattened Device Tree) [ 4.705749] PC is at strcmp+0x0/0x34 [ 4.709350] LR is at ahci_platform_find_clk+0x3c/0x5c [ 4.714416] pc : [] lr : [] psr: 20000013 [ 4.720703] sp : f000dda8 ip : 00000001 fp : c29b1840 [ 4.725952] r10: 00000020 r9 : c1b23380 r8 : c1b23368 [ 4.731201] r7 : c1ab4cc4 r6 : 00000001 r5 : c3c66040 r4 : 00000000 [ 4.737762] r3 : 00000080 r2 : 00000080 r1 : c1ab4cc4 r0 : 00000000 [...] [ 4.998870] strcmp from ahci_platform_find_clk+0x3c/0x5c [ 5.004302] ahci_platform_find_clk from ahci_dwc_probe+0x1f0/0x54c [ 5.010589] ahci_dwc_probe from platform_probe+0x64/0xc0 [ 5.016021] platform_probe from really_probe+0xe8/0x41c [ 5.021362] really_probe from __driver_probe_device+0xa4/0x204 [ 5.027313] __driver_probe_device from driver_probe_device+0x38/0xc8 [ 5.033782] driver_probe_device from __driver_attach+0xb4/0x1ec [ 5.039825] __driver_attach from bus_for_each_dev+0x78/0xb8 [ 5.045532] bus_for_each_dev from bus_add_driver+0x17c/0x220 [ 5.051300] bus_add_driver from driver_register+0x90/0x124 [ 5.056915] driver_register from do_one_initcall+0x48/0x1e8 [ 5.062591] do_one_initcall from kernel_init_freeable+0x1cc/0x234 [ 5.068817] kernel_init_freeable from kernel_init+0x20/0x13c [ 5.074584] kernel_init from ret_from_fork+0x14/0x2c [ 5.079681] Exception stack(0xf000dfb0 to 0xf000dff8) [ 5.084747] dfa0: 00000000 00000000 00000000 00000000 [ 5.092956] dfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 5.101165] dfe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 5.107818] Code: e5e32001 e3520000 1afffffb e12fff1e (e4d03001) [ 5.114013] ---[ end trace 0000000000000000 ]--- Add an extra check in the if-statement if hpriv-clks[i].id. Fixes: 6ce73f3a6fc0 ("ata: libahci_platform: Add function returning a clock-handle by id") Suggested-by: Arnd Bergmann Signed-off-by: Anders Roxell Reviewed-by: Serge Semin Signed-off-by: Damien Le Moal --- drivers/ata/libahci_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index ddf17e2d266c..b9e336bacf17 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -109,7 +109,7 @@ struct clk *ahci_platform_find_clk(struct ahci_host_priv *hpriv, const char *con int i; for (i = 0; i < hpriv->n_clks; i++) { - if (!strcmp(hpriv->clks[i].id, con_id)) + if (hpriv->clks[i].id && !strcmp(hpriv->clks[i].id, con_id)) return hpriv->clks[i].clk; } From 1f154f3b56a1a172833eedf77b72745acc8d9259 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 6 Dec 2022 11:20:55 +0800 Subject: [PATCH 911/963] bonding: get correct NA dest address In commit 4d633d1b468b ("bonding: fix ICMPv6 header handling when receiving IPv6 messages"), there is a copy/paste issue for NA daddr. I found that in my testing and fixed it in my local branch. But I forgot to re-format the patch and sent the wrong mail. Fix it by reading the correct dest address. Fixes: 4d633d1b468b ("bonding: fix ICMPv6 header handling when receiving IPv6 messages") Signed-off-by: Hangbin Liu Reviewed-by: Eric Dumazet Reviewed-by: Jiri Pirko Acked-by: Jonathan Toppins Link: https://lore.kernel.org/r/20221206032055.7517-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f298b9b3eb77..b9a882f182d2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3247,7 +3247,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, goto out; saddr = &combined->ip6.saddr; - daddr = &combined->ip6.saddr; + daddr = &combined->ip6.daddr; slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", __func__, slave->dev->name, bond_slave_state(slave), From 7e6303567ce3ca506e4a2704e4baa86f1d8bde02 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 5 Dec 2022 21:46:04 +0100 Subject: [PATCH 912/963] net: fec: properly guard irq coalesce setup Prior to the Fixes: commit, the initialization code went through the same fec_enet_set_coalesce() function as used by ethtool, and that function correctly checks whether the current variant has support for irq coalescing. Now that the initialization code instead calls fec_enet_itr_coal_set() directly, that call needs to be guarded by a check for the FEC_QUIRK_HAS_COALESCE bit. Fixes: df727d4547de (net: fec: don't reset irq coalesce settings to defaults on "ip link up") Reported-by: Greg Ungerer Signed-off-by: Rasmus Villemoes Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221205204604.869853-1-linux@rasmusvillemoes.dk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2ca2b61b451f..23e1a94b9ce4 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1220,7 +1220,8 @@ fec_restart(struct net_device *ndev) writel(0, fep->hwp + FEC_IMASK); /* Init the interrupt coalescing */ - fec_enet_itr_coal_set(ndev); + if (fep->quirks & FEC_QUIRK_HAS_COALESCE) + fec_enet_itr_coal_set(ndev); } static int fec_enet_ipc_handle_init(struct fec_enet_private *fep) From f96a3d74554df537b6db5c99c27c80e7afadc8d1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 4 Dec 2022 09:50:44 +0200 Subject: [PATCH 913/963] ipv4: Fix incorrect route flushing when source address is deleted Cited commit added the table ID to the FIB info structure, but did not prevent structures with different table IDs from being consolidated. This can lead to routes being flushed from a VRF when an address is deleted from a different VRF. Fix by taking the table ID into account when looking for a matching FIB info. This is already done for FIB info structures backed by a nexthop object in fib_find_info_nh(). Add test cases that fail before the fix: # ./fib_tests.sh -t ipv4_del_addr IPv4 delete address route tests Regular FIB info TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Identical FIB info with different table ID TEST: Route removed from VRF when source address deleted [FAIL] TEST: Route in default VRF not removed [ OK ] RTNETLINK answers: File exists TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [FAIL] Tests passed: 6 Tests failed: 2 And pass after: # ./fib_tests.sh -t ipv4_del_addr IPv4 delete address route tests Regular FIB info TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Identical FIB info with different table ID TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Tests passed: 8 Tests failed: 0 Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs") Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/fib_semantics.c | 1 + tools/testing/selftests/net/fib_tests.sh | 27 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 19a662003eef..ce9ff3c62e84 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -423,6 +423,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi) nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && nfi->fib_type == fi->fib_type && + nfi->fib_tb_id == fi->fib_tb_id && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 2271a8727f62..11c89148b19f 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1711,13 +1711,19 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 + $IP addr add dev dummy1 172.16.104.12/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 + $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e # removing address from device in vrf should only remove route from vrf table + echo " Regular FIB info" + $IP addr del dev dummy2 172.16.104.11/24 $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 1 "Route removed from VRF when source address deleted" @@ -1735,6 +1741,27 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in vrf should only remove route from vrf + # table even when the associated fib info only differs in table ID + echo " Identical FIB info with different table ID" + + $IP addr del dev dummy2 172.16.104.12/24 + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" + + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 0 "Route in default VRF not removed" + + $IP addr add dev dummy2 172.16.104.12/24 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 + + $IP addr del dev dummy1 172.16.104.12/24 + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + $IP li del dummy1 $IP li del dummy2 cleanup From c0d999348e01df03e0a7f550351f3907fabbf611 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 4 Dec 2022 09:50:45 +0200 Subject: [PATCH 914/963] ipv4: Fix incorrect route flushing when table ID 0 is used Cited commit added the table ID to the FIB info structure, but did not properly initialize it when table ID 0 is used. This can lead to a route in the default VRF with a preferred source address not being flushed when the address is deleted. Consider the following example: # ip address add dev dummy1 192.0.2.1/28 # ip address add dev dummy1 192.0.2.17/28 # ip route add 198.51.100.0/24 via 192.0.2.2 src 192.0.2.17 metric 100 # ip route add table 0 198.51.100.0/24 via 192.0.2.2 src 192.0.2.17 metric 200 # ip route show 198.51.100.0/24 198.51.100.0/24 via 192.0.2.2 dev dummy1 src 192.0.2.17 metric 100 198.51.100.0/24 via 192.0.2.2 dev dummy1 src 192.0.2.17 metric 200 Both routes are installed in the default VRF, but they are using two different FIB info structures. One with a metric of 100 and table ID of 254 (main) and one with a metric of 200 and table ID of 0. Therefore, when the preferred source address is deleted from the default VRF, the second route is not flushed: # ip address del dev dummy1 192.0.2.17/28 # ip route show 198.51.100.0/24 198.51.100.0/24 via 192.0.2.2 dev dummy1 src 192.0.2.17 metric 200 Fix by storing a table ID of 254 instead of 0 in the route configuration structure. Add a test case that fails before the fix: # ./fib_tests.sh -t ipv4_del_addr IPv4 delete address route tests Regular FIB info TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Identical FIB info with different table ID TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Table ID 0 TEST: Route removed in default VRF when source address deleted [FAIL] Tests passed: 8 Tests failed: 1 And passes after: # ./fib_tests.sh -t ipv4_del_addr IPv4 delete address route tests Regular FIB info TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Identical FIB info with different table ID TEST: Route removed from VRF when source address deleted [ OK ] TEST: Route in default VRF not removed [ OK ] TEST: Route removed in default VRF when source address deleted [ OK ] TEST: Route in VRF is not removed by address delete [ OK ] Table ID 0 TEST: Route removed in default VRF when source address deleted [ OK ] Tests passed: 9 Tests failed: 0 Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs") Reported-by: Donald Sharp Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 3 +++ tools/testing/selftests/net/fib_tests.sh | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f361d3d56be2..b5736ef16ed2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -841,6 +841,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, return -EINVAL; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + return 0; errout: return err; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 11c89148b19f..5637b5dadabd 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1712,11 +1712,13 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 $IP addr add dev dummy1 172.16.104.12/24 + $IP addr add dev dummy1 172.16.104.13/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 + $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e @@ -1762,6 +1764,14 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.106.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in default vrf should remove route from + # the default vrf even when route was inserted with a table ID of 0. + echo " Table ID 0" + + $IP addr del dev dummy1 172.16.104.13/24 + $IP ro ls | grep -q 172.16.107.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + $IP li del dummy1 $IP li del dummy2 cleanup From 78a9ea43fc1a7c06a420b132d2d47cbf4344a5df Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 5 Dec 2022 09:21:32 +0800 Subject: [PATCH 915/963] net: dsa: sja1105: fix memory leak in sja1105_setup_devlink_regions() When dsa_devlink_region_create failed in sja1105_setup_devlink_regions(), priv->regions is not released. Fixes: bf425b82059e ("net: dsa: sja1105: expose static config as devlink region") Signed-off-by: Zhengchao Shao Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221205012132.2110979-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_devlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 10c6fea1227f..bdbbff2a7909 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -95,6 +95,8 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds) if (IS_ERR(region)) { while (--i >= 0) dsa_devlink_region_destroy(priv->regions[i]); + + kfree(priv->regions); return PTR_ERR(region); } From 0acc442309a0a1b01bcdaa135e56e6398a49439c Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 6 Dec 2022 21:12:59 +0100 Subject: [PATCH 916/963] can: af_can: fix NULL pointer dereference in can_rcv_filter Analogue to commit 8aa59e355949 ("can: af_can: fix NULL pointer dereference in can_rx_register()") we need to check for a missing initialization of ml_priv in the receive path of CAN frames. Since commit 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") the check for dev->type to be ARPHRD_CAN is not sufficient anymore since bonding or tun netdevices claim to be CAN devices but do not initialize ml_priv accordingly. Fixes: 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") Reported-by: syzbot+2d7f58292cb5b29eb5ad@syzkaller.appspotmail.com Reported-by: Wei Chen Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20221206201259.3028-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- net/can/af_can.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 27dcdcc0b808..c69168f11e44 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -677,7 +677,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_can_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_can_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); @@ -692,7 +692,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canfd_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canfd_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); @@ -707,7 +707,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, static int canxl_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canxl_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canxl_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n", dev->type, skb->len); From fb855e9f3b6b42c72af3f1eb0b288998fe0d5ebb Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 1 Dec 2022 08:34:26 +0100 Subject: [PATCH 917/963] can: slcan: fix freed work crash The LTP test pty03 is causing a crash in slcan: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 0 PID: 348 Comm: kworker/0:3 Not tainted 6.0.8-1-default #1 openSUSE Tumbleweed 9d20364b934f5aab0a9bdf84e8f45cfdfae39dab Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014 Workqueue: 0x0 (events) RIP: 0010:process_one_work (/home/rich/kernel/linux/kernel/workqueue.c:706 /home/rich/kernel/linux/kernel/workqueue.c:2185) Code: 49 89 ff 41 56 41 55 41 54 55 53 48 89 f3 48 83 ec 10 48 8b 06 48 8b 6f 48 49 89 c4 45 30 e4 a8 04 b8 00 00 00 00 4c 0f 44 e0 <49> 8b 44 24 08 44 8b a8 00 01 00 00 41 83 e5 20 f6 45 10 04 75 0e RSP: 0018:ffffaf7b40f47e98 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff9d644e1b8b48 RCX: ffff9d649e439968 RDX: 00000000ffff8455 RSI: ffff9d644e1b8b48 RDI: ffff9d64764aa6c0 RBP: ffff9d649e4335c0 R08: 0000000000000c00 R09: ffff9d64764aa734 R10: 0000000000000007 R11: 0000000000000001 R12: 0000000000000000 R13: ffff9d649e4335e8 R14: ffff9d64490da780 R15: ffff9d64764aa6c0 FS: 0000000000000000(0000) GS:ffff9d649e400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000036424000 CR4: 00000000000006f0 Call Trace: worker_thread (/home/rich/kernel/linux/kernel/workqueue.c:2436) kthread (/home/rich/kernel/linux/kernel/kthread.c:376) ret_from_fork (/home/rich/kernel/linux/arch/x86/entry/entry_64.S:312) Apparently, the slcan's tx_work is freed while being scheduled. While slcan_netdev_close() (netdev side) calls flush_work(&sl->tx_work), slcan_close() (tty side) does not. So when the netdev is never set UP, but the tty is stuffed with bytes and forced to wakeup write, the work is scheduled, but never flushed. So add an additional flush_work() to slcan_close() to be sure the work is flushed under all circumstances. The Fixes commit below moved flush_work() from slcan_close() to slcan_netdev_close(). What was the rationale behind it? Maybe we can drop the one in slcan_netdev_close()? I see the same pattern in can327. So it perhaps needs the very same fix. Fixes: cfcb4465e992 ("can: slcan: remove legacy infrastructure") Link: https://bugzilla.suse.com/show_bug.cgi?id=1205597 Reported-by: Richard Palethorpe Tested-by: Petr Vorel Cc: Dario Binacchi Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Cc: Max Staudt Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Max Staudt Link: https://lore.kernel.org/all/20221201073426.17328-1-jirislaby@kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/slcan/slcan-core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index fbb34139daa1..f4db77007c13 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -864,12 +864,14 @@ static void slcan_close(struct tty_struct *tty) { struct slcan *sl = (struct slcan *)tty->disc_data; - /* unregister_netdev() calls .ndo_stop() so we don't have to. - * Our .ndo_stop() also flushes the TTY write wakeup handler, - * so we can safely set sl->tty = NULL after this. - */ unregister_candev(sl->dev); + /* + * The netdev needn't be UP (so .ndo_stop() is not called). Hence make + * sure this is not running before freeing it up. + */ + flush_work(&sl->tx_work); + /* Mark channel as dead */ spin_lock_bh(&sl->lock); tty->disc_data = NULL; From f4a4d121ebecaa6f396f21745ce97de014281ccc Mon Sep 17 00:00:00 2001 From: Max Staudt Date: Sat, 3 Dec 2022 01:01:48 +0900 Subject: [PATCH 918/963] can: can327: flush TX_work on ldisc .close() Additionally, remove it from .ndo_stop(). This ensures that the worker is not called after being freed, and that the UART TX queue remains active to send final commands when the netdev is stopped. Thanks to Jiri Slaby for finding this in slcan: https://lore.kernel.org/linux-can/20221201073426.17328-1-jirislaby@kernel.org/ A variant of this patch for slcan, with the flush in .ndo_stop() still present, has been tested successfully on physical hardware: https://bugzilla.suse.com/show_bug.cgi?id=1205597 Fixes: 43da2f07622f ("can: can327: CAN/ldisc driver for ELM327 based OBD-II adapters") Cc: "Jiri Slaby (SUSE)" Cc: Max Staudt Cc: Wolfgang Grandegger Cc: Marc Kleine-Budde Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Max Staudt Link: https://lore.kernel.org/all/20221202160148.282564-1-max@enpas.org Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/can327.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index ed3d0b8989a0..dc7192ecb001 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -796,9 +796,9 @@ static int can327_netdev_close(struct net_device *dev) netif_stop_queue(dev); - /* Give UART one final chance to flush. */ - clear_bit(TTY_DO_WRITE_WAKEUP, &elm->tty->flags); - flush_work(&elm->tx_work); + /* We don't flush the UART TX queue here, as we want final stop + * commands (like the above dummy char) to be flushed out. + */ can_rx_offload_disable(&elm->offload); elm->can.state = CAN_STATE_STOPPED; @@ -1069,12 +1069,15 @@ static void can327_ldisc_close(struct tty_struct *tty) { struct can327 *elm = (struct can327 *)tty->disc_data; - /* unregister_netdev() calls .ndo_stop() so we don't have to. - * Our .ndo_stop() also flushes the TTY write wakeup handler, - * so we can safely set elm->tty = NULL after this. - */ + /* unregister_netdev() calls .ndo_stop() so we don't have to. */ unregister_candev(elm->dev); + /* Give UART one final chance to flush. + * No need to clear TTY_DO_WRITE_WAKEUP since .write_wakeup() is + * serialised against .close() and will not be called once we return. + */ + flush_work(&elm->tx_work); + /* Mark channel as dead */ spin_lock_bh(&elm->lock); tty->disc_data = NULL; From 918ee4911f7a41fb4505dff877c1d7f9f64eb43e Mon Sep 17 00:00:00 2001 From: Frank Jungclaus Date: Wed, 30 Nov 2022 21:22:42 +0100 Subject: [PATCH 919/963] can: esd_usb: Allow REC and TEC to return to zero We don't get any further EVENT from an esd CAN USB device for changes on REC or TEC while those counters converge to 0 (with ecc == 0). So when handling the "Back to Error Active"-event force txerr = rxerr = 0, otherwise the berr-counters might stay on values like 95 forever. Also, to make life easier during the ongoing development a netdev_dbg() has been introduced to allow dumping error events send by an esd CAN USB device. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Signed-off-by: Frank Jungclaus Link: https://lore.kernel.org/all/20221130202242.3998219-2-frank.jungclaus@esd.eu Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/esd_usb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 81b88e9e5bdc..42323f5e6f3a 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -234,6 +234,10 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv, u8 rxerr = msg->msg.rx.data[2]; u8 txerr = msg->msg.rx.data[3]; + netdev_dbg(priv->netdev, + "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n", + msg->msg.rx.dlc, state, ecc, rxerr, txerr); + skb = alloc_can_err_skb(priv->netdev, &cf); if (skb == NULL) { stats->rx_dropped++; @@ -260,6 +264,8 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv, break; default: priv->can.state = CAN_STATE_ERROR_ACTIVE; + txerr = 0; + rxerr = 0; break; } } else { From 88956177db179e4eba7cd590971961857d1565b8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 3 Dec 2022 18:37:21 -0500 Subject: [PATCH 920/963] tipc: call tipc_lxc_xmit without holding node_read_lock When sending packets between nodes in netns, it calls tipc_lxc_xmit() for peer node to receive the packets where tipc_sk_mcast_rcv()/tipc_sk_rcv() might be called, and it's pretty much like in tipc_rcv(). Currently the local 'node rw lock' is held during calling tipc_lxc_xmit() to protect the peer_net not being freed by another thread. However, when receiving these packets, tipc_node_add_conn() might be called where the peer 'node rw lock' is acquired. Then a dead lock warning is triggered by lockdep detector, although it is not a real dead lock: WARNING: possible recursive locking detected -------------------------------------------- conn_server/1086 is trying to acquire lock: ffff8880065cb020 (&n->lock#2){++--}-{2:2}, \ at: tipc_node_add_conn.cold.76+0xaa/0x211 [tipc] but task is already holding lock: ffff8880065cd020 (&n->lock#2){++--}-{2:2}, \ at: tipc_node_xmit+0x285/0xb30 [tipc] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&n->lock#2); lock(&n->lock#2); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by conn_server/1086: #0: ffff8880036d1e40 (sk_lock-AF_TIPC){+.+.}-{0:0}, \ at: tipc_accept+0x9c0/0x10b0 [tipc] #1: ffff8880036d5f80 (sk_lock-AF_TIPC/1){+.+.}-{0:0}, \ at: tipc_accept+0x363/0x10b0 [tipc] #2: ffff8880065cd020 (&n->lock#2){++--}-{2:2}, \ at: tipc_node_xmit+0x285/0xb30 [tipc] #3: ffff888012e13370 (slock-AF_TIPC){+...}-{2:2}, \ at: tipc_sk_rcv+0x2da/0x1b40 [tipc] Call Trace: dump_stack_lvl+0x44/0x5b __lock_acquire.cold.77+0x1f2/0x3d7 lock_acquire+0x1d2/0x610 _raw_write_lock_bh+0x38/0x80 tipc_node_add_conn.cold.76+0xaa/0x211 [tipc] tipc_sk_finish_conn+0x21e/0x640 [tipc] tipc_sk_filter_rcv+0x147b/0x3030 [tipc] tipc_sk_rcv+0xbb4/0x1b40 [tipc] tipc_lxc_xmit+0x225/0x26b [tipc] tipc_node_xmit.cold.82+0x4a/0x102 [tipc] __tipc_sendstream+0x879/0xff0 [tipc] tipc_accept+0x966/0x10b0 [tipc] do_accept+0x37d/0x590 This patch avoids this warning by not holding the 'node rw lock' before calling tipc_lxc_xmit(). As to protect the 'peer_net', rcu_read_lock() should be enough, as in cleanup_net() when freeing the netns, it calls synchronize_rcu() before the free is continued. Also since tipc_lxc_xmit() is like the RX path in tipc_rcv(), it makes sense to call it under rcu_read_lock(). Note that the right lock order must be: rcu_read_lock(); tipc_node_read_lock(n); tipc_node_read_unlock(n); tipc_lxc_xmit(); rcu_read_unlock(); instead of: tipc_node_read_lock(n); rcu_read_lock(); tipc_node_read_unlock(n); tipc_lxc_xmit(); rcu_read_unlock(); and we have to call tipc_node_read_lock/unlock() twice in tipc_node_xmit(). Fixes: f73b12812a3d ("tipc: improve throughput between nodes in netns") Reported-by: Shuang Li Signed-off-by: Xin Long Link: https://lore.kernel.org/r/5bdd1f8fee9db695cfff4528a48c9b9d0523fb00.1670110641.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni --- net/tipc/node.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index b48d97cbbe29..49ddc484c4fe 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1689,6 +1689,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, struct tipc_node *n; struct sk_buff_head xmitq; bool node_up = false; + struct net *peer_net; int bearer_id; int rc; @@ -1705,18 +1706,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, return -EHOSTUNREACH; } + rcu_read_lock(); tipc_node_read_lock(n); node_up = node_is_up(n); - if (node_up && n->peer_net && check_net(n->peer_net)) { + peer_net = n->peer_net; + tipc_node_read_unlock(n); + if (node_up && peer_net && check_net(peer_net)) { /* xmit inner linux container */ - tipc_lxc_xmit(n->peer_net, list); + tipc_lxc_xmit(peer_net, list); if (likely(skb_queue_empty(list))) { - tipc_node_read_unlock(n); + rcu_read_unlock(); tipc_node_put(n); return 0; } } + rcu_read_unlock(); + tipc_node_read_lock(n); bearer_id = n->active_links[selector & 1]; if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); From 063a932b64db3317ec020c94466fe52923a15f60 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Sun, 4 Dec 2022 14:09:08 +0800 Subject: [PATCH 921/963] ethernet: aeroflex: fix potential skb leak in greth_init_rings() The greth_init_rings() function won't free the newly allocated skb when dma_mapping_error() returns error, so add dev_kfree_skb() to fix it. Compile tested only. Fixes: d4c41139df6e ("net: Add Aeroflex Gaisler 10/100/1G Ethernet MAC driver") Signed-off-by: Zhang Changzhong Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/1670134149-29516-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/aeroflex/greth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index e104fb02817d..aa0d2f3aaeaa 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth) if (dma_mapping_error(greth->dev, dma_addr)) { if (netif_msg_ifup(greth)) dev_err(greth->dev, "Could not create initial DMA mapping\n"); + dev_kfree_skb(skb); goto cleanup; } greth->rx_skbuff[i] = skb; From 4fad22a1281c500f15b172c9d261eff347ca634b Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 5 Dec 2022 06:15:15 +0000 Subject: [PATCH 922/963] dpaa2-switch: Fix memory leak in dpaa2_switch_acl_entry_add() and dpaa2_switch_acl_entry_remove() The cmd_buff needs to be freed when error happened in dpaa2_switch_acl_entry_add() and dpaa2_switch_acl_entry_remove(). Fixes: 1110318d83e8 ("dpaa2-switch: add tc flower hardware offload on ingress traffic") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221205061515.115012-1-yuancan@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c index cacd454ac696..c39b866e2582 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -132,6 +132,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { dev_err(dev, "DMA mapping failed\n"); + kfree(cmd_buff); return -EFAULT; } @@ -142,6 +143,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); + kfree(cmd_buff); return err; } @@ -172,6 +174,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { dev_err(dev, "DMA mapping failed\n"); + kfree(cmd_buff); return -EFAULT; } @@ -182,6 +185,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); + kfree(cmd_buff); return err; } From 998b30c3948e4d0b1097e639918c5cff332acac5 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Tue, 6 Dec 2022 01:38:32 -0800 Subject: [PATCH 923/963] io_uring: Fix a null-ptr-deref in io_tctx_exit_cb() Syzkaller reports a NULL deref bug as follows: BUG: KASAN: null-ptr-deref in io_tctx_exit_cb+0x53/0xd3 Read of size 4 at addr 0000000000000138 by task file1/1955 CPU: 1 PID: 1955 Comm: file1 Not tainted 6.1.0-rc7-00103-gef4d3ea40565 #75 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Call Trace: dump_stack_lvl+0xcd/0x134 ? io_tctx_exit_cb+0x53/0xd3 kasan_report+0xbb/0x1f0 ? io_tctx_exit_cb+0x53/0xd3 kasan_check_range+0x140/0x190 io_tctx_exit_cb+0x53/0xd3 task_work_run+0x164/0x250 ? task_work_cancel+0x30/0x30 get_signal+0x1c3/0x2440 ? lock_downgrade+0x6e0/0x6e0 ? lock_downgrade+0x6e0/0x6e0 ? exit_signals+0x8b0/0x8b0 ? do_raw_read_unlock+0x3b/0x70 ? do_raw_spin_unlock+0x50/0x230 arch_do_signal_or_restart+0x82/0x2470 ? kmem_cache_free+0x260/0x4b0 ? putname+0xfe/0x140 ? get_sigframe_size+0x10/0x10 ? do_execveat_common.isra.0+0x226/0x710 ? lockdep_hardirqs_on+0x79/0x100 ? putname+0xfe/0x140 ? do_execveat_common.isra.0+0x238/0x710 exit_to_user_mode_prepare+0x15f/0x250 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x42/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0023:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 002b:00000000fffb7790 EFLAGS: 00000200 ORIG_RAX: 000000000000000b RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Kernel panic - not syncing: panic_on_warn set ... This happens because the adding of task_work from io_ring_exit_work() isn't synchronized with canceling all work items from eg exec. The execution of the two are ordered in that they are both run by the task itself, but if io_tctx_exit_cb() is queued while we're canceling all work items off exec AND gets executed when the task exits to userspace rather than in the main loop in io_uring_cancel_generic(), then we can find current->io_uring == NULL and hit the above crash. It's safe to add this NULL check here, because the execution of the two paths are done by the task itself. Cc: stable@vger.kernel.org Fixes: d56d938b4bef ("io_uring: do ctx initiated file note removal") Reported-by: syzkaller Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20221206093833.3812138-1-harshit.m.mogalapalli@oracle.com [axboe: add code comment and also put an explanation in the commit msg] Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 8840cf3e20f2..61cd7ffd0f6a 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2707,8 +2707,10 @@ static __cold void io_tctx_exit_cb(struct callback_head *cb) /* * When @in_idle, we're in cancellation and it's racy to remove the * node. It'll be removed by the end of cancellation, just ignore it. + * tctx can be NULL if the queueing of this task_work raced with + * work cancelation off the exec path. */ - if (!atomic_read(&tctx->in_idle)) + if (tctx && !atomic_read(&tctx->in_idle)) io_uring_del_tctx_node((unsigned long)work->ctx); complete(&work->completion); } From 73a0b6ee5d6269f92df43e1d09b3278a2886bf8a Mon Sep 17 00:00:00 2001 From: Wang Kefeng Date: Sun, 4 Dec 2022 04:46:20 +0100 Subject: [PATCH 924/963] ARM: 9278/1: kfence: only handle translation faults This is a similar fixup like arm64 does, only handle translation faults in case of unexpected kfence report when alignment faults on ARM, see more from commit 0bb1fbffc631 ("arm64: mm: kfence: only handle translation faults"). Fixes: 75969686ec0d ("ARM: 9166/1: Support KFENCE for ARM") Signed-off-by: Kefeng Wang Signed-off-by: Russell King (Oracle) --- arch/arm/mm/fault.c | 18 ++++++++++++++++-- arch/arm/mm/fault.h | 9 ++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 46cccd6bf705..de988cba9a4b 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -105,6 +105,19 @@ static inline bool is_write_fault(unsigned int fsr) return (fsr & FSR_WRITE) && !(fsr & FSR_CM); } +static inline bool is_translation_fault(unsigned int fsr) +{ + int fs = fsr_fs(fsr); +#ifdef CONFIG_ARM_LPAE + if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL) + return true; +#else + if (fs == FS_L1_TRANS || fs == FS_L2_TRANS) + return true; +#endif + return false; +} + static void die_kernel_fault(const char *msg, struct mm_struct *mm, unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -140,7 +153,8 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; } else { - if (kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) + if (is_translation_fault(fsr) && + kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) return; msg = "paging request"; @@ -208,7 +222,7 @@ static inline bool is_permission_fault(unsigned int fsr) { int fs = fsr_fs(fsr); #ifdef CONFIG_ARM_LPAE - if ((fs & FS_PERM_NOLL_MASK) == FS_PERM_NOLL) + if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) return true; #else if (fs == FS_L1_PERM || fs == FS_L2_PERM) diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h index 83b5ab32d7a4..54927ba1fa6e 100644 --- a/arch/arm/mm/fault.h +++ b/arch/arm/mm/fault.h @@ -14,8 +14,9 @@ #ifdef CONFIG_ARM_LPAE #define FSR_FS_AEA 17 +#define FS_TRANS_NOLL 0x4 #define FS_PERM_NOLL 0xC -#define FS_PERM_NOLL_MASK 0x3C +#define FS_MMU_NOLL_MASK 0x3C static inline int fsr_fs(unsigned int fsr) { @@ -23,8 +24,10 @@ static inline int fsr_fs(unsigned int fsr) } #else #define FSR_FS_AEA 22 -#define FS_L1_PERM 0xD -#define FS_L2_PERM 0xF +#define FS_L1_TRANS 0x5 +#define FS_L2_TRANS 0x7 +#define FS_L1_PERM 0xD +#define FS_L2_PERM 0xF static inline int fsr_fs(unsigned int fsr) { From e4678483f9bc400642bbc05c6b75a1b44bcb6c25 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 16 Nov 2022 09:43:41 -0600 Subject: [PATCH 925/963] platform/x86/amd: pmc: Add a workaround for an s0i3 issue on Cezanne Cezanne platforms under the right circumstances have a synchronization problem where attempting to enter s2idle may fail if the x86 cores are put into HLT before hardware resume from the previous attempt has completed. To avoid this issue add a 10-20ms delay before entering s2idle another time. This workaround will only be applied on interrupts that wake the hardware but don't break the s2idle loop. Cc: stable@vger.kernel.org # 6.1 Cc: "Mahapatra, Rajib" Cc: "Raul Rangel" Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20221116154341.13382-1-mario.limonciello@amd.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/amd/pmc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index ef4ae977b8e0..439d282aafd1 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -739,8 +739,14 @@ static void amd_pmc_s2idle_prepare(void) static void amd_pmc_s2idle_check(void) { struct amd_pmc_dev *pdev = &pmc; + struct smu_metrics table; int rc; + /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */ + if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) && + table.s0i3_last_entry_status) + usleep_range(10000, 20000); + /* Dump the IdleMask before we add to the STB */ amd_pmc_idlemask_read(pdev, pdev->dev, NULL); From 9e624651859214fb2c4e442b059eba0aefcd0801 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 5 Dec 2022 22:13:33 +0800 Subject: [PATCH 926/963] xen/netback: don't call kfree_skb() under spin_lock_irqsave() It is not allowed to call kfree_skb() from hardware interrupt context or with interrupts being disabled. So replace kfree_skb() with dev_kfree_skb_irq() under spin_lock_irqsave(). Fixes: be81992f9086 ("xen/netback: don't queue unlimited number of packages") Signed-off-by: Yang Yingliang Reviewed-by: Paul Durrant Link: https://lore.kernel.org/r/20221205141333.3974565-1-yangyingliang@huawei.com Signed-off-by: Paolo Abeni --- drivers/net/xen-netback/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 932762177110..d022206a0d17 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -92,7 +92,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - kfree_skb(skb); + dev_kfree_skb_irq(skb); queue->vif->dev->stats.rx_dropped++; } else { if (skb_queue_empty(&queue->rx_queue)) From 7dfa764e0223a324366a2a1fc056d4d9d4e95491 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 7 Dec 2022 08:19:38 +0100 Subject: [PATCH 927/963] xen/netback: fix build warning Commit ad7f402ae4f4 ("xen/netback: Ensure protocol headers don't fall in the non-linear area") introduced a (valid) build warning. There have even been reports of this problem breaking networking of Xen guests. Fixes: ad7f402ae4f4 ("xen/netback: Ensure protocol headers don't fall in the non-linear area") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Ross Lagerwall Tested-by: Jason Andryuk Signed-off-by: Juergen Gross --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 054ac0e897f6..bf627af723bf 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -530,7 +530,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, const bool sharedslot = nr_frags && frag_get_pending_idx(&shinfo->frags[0]) == copy_pending_idx(skb, copy_count(skb) - 1); - int i, err; + int i, err = 0; for (i = 0; i < copy_count(skb); i++) { int newerr; From 87a39882b5ab3127700ac4b9277608075f98eda2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 5 Dec 2022 21:48:45 +0200 Subject: [PATCH 928/963] net: dsa: mv88e6xxx: accept phy-mode = "internal" for internal PHY ports The ethernet-controller dt-schema, mostly pushed forward by Linux, has the "internal" PHY mode for denoting MAC connections to an internal PHY. U-Boot may provide device tree blobs where this phy-mode is specified, so make the Linux driver accept them. It appears that the current behavior with phy-mode = "internal" was introduced when mv88e6xxx started reporting supported_interfaces to phylink. Prior to that, I don't think it would have any issues accepting this phy-mode. Fixes: d4ebf12bcec4 ("net: dsa: mv88e6xxx: populate supported_interfaces and mac_capabilities") Link: https://lore.kernel.org/linux-arm-kernel/20221205172709.kglithpbhdbsakvd@skbuf/T/ Reported-by: Tim Harvey Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Tim Harvey # imx6q-gw904.dts Link: https://lore.kernel.org/r/20221205194845.2131161-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/dsa/mv88e6xxx/chip.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 2479be3a1e35..937cb22cb3d4 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -833,10 +833,13 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port, chip->info->ops->phylink_get_caps(chip, port, config); - /* Internal ports need GMII for PHYLIB */ - if (mv88e6xxx_phy_is_internal(ds, port)) + if (mv88e6xxx_phy_is_internal(ds, port)) { + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); + /* Internal ports with no phy-mode need GMII for PHYLIB */ __set_bit(PHY_INTERFACE_MODE_GMII, config->supported_interfaces); + } } static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, From bc21fe9a5844c5bc8f7ec319b11d2671a94eb867 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 1 Dec 2022 11:17:31 +0800 Subject: [PATCH 929/963] drm/amdgpu/sdma_v4_0: turn off SDMA ring buffer in the s2idle suspend In the SDMA s0ix save process requires to turn off SDMA ring buffer for avoiding the SDMA in-flight request, otherwise will suffer from SDMA page fault which causes by page request from in-flight SDMA ring accessing at SDMA restore phase. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2248 Cc: stable@vger.kernel.org # 6.0,5.15+ Fixes: f8f4e2a51834 ("drm/amdgpu: skipping SDMA hw_init and hw_fini for S0ix.") Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Tested-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 1122bd4eae98..4d780e4430e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -907,13 +907,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se /** - * sdma_v4_0_gfx_stop - stop the gfx async dma engines + * sdma_v4_0_gfx_enable - enable the gfx async dma engines * * @adev: amdgpu_device pointer - * - * Stop the gfx async dma ring buffers (VEGA10). + * @enable: enable SDMA RB/IB + * control the gfx async dma ring buffers (VEGA10). */ -static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) +static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable) { u32 rb_cntl, ib_cntl; int i; @@ -922,10 +922,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0); WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); } } @@ -1044,7 +1044,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) int i; if (!enable) { - sdma_v4_0_gfx_stop(adev); + sdma_v4_0_gfx_enable(adev, enable); sdma_v4_0_rlc_stop(adev); if (adev->sdma.has_page_queue) sdma_v4_0_page_stop(adev); @@ -1960,8 +1960,10 @@ static int sdma_v4_0_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SMU saves SDMA state for us */ - if (adev->in_s0ix) + if (adev->in_s0ix) { + sdma_v4_0_gfx_enable(adev, false); return 0; + } return sdma_v4_0_hw_fini(adev); } @@ -1971,8 +1973,12 @@ static int sdma_v4_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SMU restores SDMA state for us */ - if (adev->in_s0ix) + if (adev->in_s0ix) { + sdma_v4_0_enable(adev, true); + sdma_v4_0_gfx_enable(adev, true); + amdgpu_ttm_set_buffer_funcs_status(adev, true); return 0; + } return sdma_v4_0_hw_init(adev); } From aeffc8fb2174f017a10df114bc312f899904dc68 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 25 Nov 2022 19:13:41 -0500 Subject: [PATCH 930/963] drm/amd/display: fix array index out of bound error in DCN32 DML [Why&How] LinkCapacitySupport array is indexed with the number of voltage states and not the number of max DPPs. Fix the error by changing the array declaration to use the correct (larger) array size of total number of voltage states. Signed-off-by: Aurabindo Pillai Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x --- drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 630f3395e90a..a0207a8f8756 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -1153,7 +1153,7 @@ struct vba_vars_st { double UrgBurstFactorLumaPre[DC__NUM_DPP__MAX]; double UrgBurstFactorChromaPre[DC__NUM_DPP__MAX]; bool NotUrgentLatencyHidingPre[DC__NUM_DPP__MAX]; - bool LinkCapacitySupport[DC__NUM_DPP__MAX]; + bool LinkCapacitySupport[DC__VOLTAGE_STATES]; bool VREADY_AT_OR_AFTER_VSYNC[DC__NUM_DPP__MAX]; unsigned int MIN_DST_Y_NEXT_START[DC__NUM_DPP__MAX]; unsigned int VFrontPorch[DC__NUM_DPP__MAX]; From 098e5edc5d048a8df8691fd9fde895af100be42b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 7 Dec 2022 13:04:34 +0000 Subject: [PATCH 931/963] media: videobuf2-core: take mmap_lock in vb2_get_unmapped_area() While vb2_mmap took the mmap_lock mutex, vb2_get_unmapped_area didn't. Add this. Also take this opportunity to move the 'q->memory != VB2_MEMORY_MMAP' check and vb2_fileio_is_active() check into __find_plane_by_offset() so both vb2_mmap and vb2_get_unmapped_area do the same checks. Since q->memory is checked while mmap_lock is held, also take that lock in reqbufs and create_bufs when it is set, and set it back to MEMORY_UNKNOWN on error. Fixes: f035eb4e976e ("[media] videobuf2: fix lockdep warning") Signed-off-by: Hans Verkuil Acked-by: Tomasz Figa Reviewed-by: Ricardo Ribalda Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- .../media/common/videobuf2/videobuf2-core.c | 102 +++++++++++++----- 1 file changed, 73 insertions(+), 29 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index ab9697f3b5f1..92efc4676df6 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -813,7 +813,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, num_buffers = max_t(unsigned int, *count, q->min_buffers_needed); num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME); memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); + /* + * Set this now to ensure that drivers see the correct q->memory value + * in the queue_setup op. + */ + mutex_lock(&q->mmap_lock); q->memory = memory; + mutex_unlock(&q->mmap_lock); set_queue_coherency(q, non_coherent_mem); /* @@ -823,22 +829,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) - return ret; + goto error; /* Check that driver has set sane values */ - if (WARN_ON(!num_planes)) - return -EINVAL; + if (WARN_ON(!num_planes)) { + ret = -EINVAL; + goto error; + } for (i = 0; i < num_planes; i++) - if (WARN_ON(!plane_sizes[i])) - return -EINVAL; + if (WARN_ON(!plane_sizes[i])) { + ret = -EINVAL; + goto error; + } /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } /* @@ -879,7 +890,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' - * from q->num_buffers. + * from q->num_buffers and it will reset q->memory to + * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, allocated_buffers); mutex_unlock(&q->mmap_lock); @@ -895,6 +907,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, q->waiting_for_buffers = !q->is_output; return 0; + +error: + mutex_lock(&q->mmap_lock); + q->memory = VB2_MEMORY_UNKNOWN; + mutex_unlock(&q->mmap_lock); + return ret; } EXPORT_SYMBOL_GPL(vb2_core_reqbufs); @@ -906,6 +924,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, unsigned int num_planes = 0, num_buffers, allocated_buffers; unsigned plane_sizes[VB2_MAX_PLANES] = { }; bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT; + bool no_previous_buffers = !q->num_buffers; int ret; if (q->num_buffers == VB2_MAX_FRAME) { @@ -913,13 +932,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, return -ENOBUFS; } - if (!q->num_buffers) { + if (no_previous_buffers) { if (q->waiting_in_dqbuf && *count) { dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); return -EBUSY; } memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); + /* + * Set this now to ensure that drivers see the correct q->memory + * value in the queue_setup op. + */ + mutex_lock(&q->mmap_lock); q->memory = memory; + mutex_unlock(&q->mmap_lock); q->waiting_for_buffers = !q->is_output; set_queue_coherency(q, non_coherent_mem); } else { @@ -945,14 +970,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) - return ret; + goto error; /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } /* @@ -983,7 +1009,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' - * from q->num_buffers. + * from q->num_buffers and it will reset q->memory to + * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, allocated_buffers); mutex_unlock(&q->mmap_lock); @@ -998,6 +1025,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, *count = allocated_buffers; return 0; + +error: + if (no_previous_buffers) { + mutex_lock(&q->mmap_lock); + q->memory = VB2_MEMORY_UNKNOWN; + mutex_unlock(&q->mmap_lock); + } + return ret; } EXPORT_SYMBOL_GPL(vb2_core_create_bufs); @@ -2164,6 +2199,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off, struct vb2_buffer *vb; unsigned int buffer, plane; + /* + * Sanity checks to ensure the lock is held, MEMORY_MMAP is + * used and fileio isn't active. + */ + lockdep_assert_held(&q->mmap_lock); + + if (q->memory != VB2_MEMORY_MMAP) { + dprintk(q, 1, "queue is not currently set up for mmap\n"); + return -EINVAL; + } + + if (vb2_fileio_is_active(q)) { + dprintk(q, 1, "file io in progress\n"); + return -EBUSY; + } + /* * Go over all buffers and their planes, comparing the given offset * with an offset assigned to each plane. If a match is found, @@ -2265,11 +2316,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) int ret; unsigned long length; - if (q->memory != VB2_MEMORY_MMAP) { - dprintk(q, 1, "queue is not currently set up for mmap\n"); - return -EINVAL; - } - /* * Check memory area access mode. */ @@ -2291,14 +2337,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) mutex_lock(&q->mmap_lock); - if (vb2_fileio_is_active(q)) { - dprintk(q, 1, "mmap: file io in progress\n"); - ret = -EBUSY; - goto unlock; - } - /* - * Find the plane corresponding to the offset passed by userspace. + * Find the plane corresponding to the offset passed by userspace. This + * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) @@ -2351,22 +2392,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q, void *vaddr; int ret; - if (q->memory != VB2_MEMORY_MMAP) { - dprintk(q, 1, "queue is not currently set up for mmap\n"); - return -EINVAL; - } + mutex_lock(&q->mmap_lock); /* - * Find the plane corresponding to the offset passed by userspace. + * Find the plane corresponding to the offset passed by userspace. This + * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) - return ret; + goto unlock; vb = q->bufs[buffer]; vaddr = vb2_plane_vaddr(vb, plane); + mutex_unlock(&q->mmap_lock); return vaddr ? (unsigned long)vaddr : -EINVAL; + +unlock: + mutex_unlock(&q->mmap_lock); + return ret; } EXPORT_SYMBOL_GPL(vb2_get_unmapped_area); #endif From b5b52de3214a29911f949459a79f6640969b5487 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Wed, 7 Dec 2022 13:49:15 +0000 Subject: [PATCH 932/963] fscache: Fix oops due to race with cookie_lru and use_cookie If a cookie expires from the LRU and the LRU_DISCARD flag is set, but the state machine has not run yet, it's possible another thread can call fscache_use_cookie and begin to use it. When the cookie_worker finally runs, it will see the LRU_DISCARD flag set, transition the cookie->state to LRU_DISCARDING, which will then withdraw the cookie. Once the cookie is withdrawn the object is removed the below oops will occur because the object associated with the cookie is now NULL. Fix the oops by clearing the LRU_DISCARD bit if another thread uses the cookie before the cookie_worker runs. BUG: kernel NULL pointer dereference, address: 0000000000000008 ... CPU: 31 PID: 44773 Comm: kworker/u130:1 Tainted: G E 6.0.0-5.dneg.x86_64 #1 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 Workqueue: events_unbound netfs_rreq_write_to_cache_work [netfs] RIP: 0010:cachefiles_prepare_write+0x28/0x90 [cachefiles] ... Call Trace: netfs_rreq_write_to_cache_work+0x11c/0x320 [netfs] process_one_work+0x217/0x3e0 worker_thread+0x4a/0x3b0 kthread+0xd6/0x100 Fixes: 12bb21a29c19 ("fscache: Implement cookie user counting and resource pinning") Reported-by: Daire Byrne Signed-off-by: Dave Wysochanski Signed-off-by: David Howells Tested-by: Daire Byrne Link: https://lore.kernel.org/r/20221117115023.1350181-1-dwysocha@redhat.com/ # v1 Link: https://lore.kernel.org/r/20221117142915.1366990-1-dwysocha@redhat.com/ # v2 Signed-off-by: Linus Torvalds --- fs/fscache/cookie.c | 8 ++++++++ include/trace/events/fscache.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 451d8a077e12..bce2492186d0 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -605,6 +605,14 @@ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify) set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags); queue = true; } + /* + * We could race with cookie_lru which may set LRU_DISCARD bit + * but has yet to run the cookie state machine. If this happens + * and another thread tries to use the cookie, clear LRU_DISCARD + * so we don't end up withdrawing the cookie while in use. + */ + if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) + fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear); break; case FSCACHE_COOKIE_STATE_FAILED: diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index c078c48a8e6d..a6190aa1b406 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -66,6 +66,7 @@ enum fscache_cookie_trace { fscache_cookie_put_work, fscache_cookie_see_active, fscache_cookie_see_lru_discard, + fscache_cookie_see_lru_discard_clear, fscache_cookie_see_lru_do_one, fscache_cookie_see_relinquish, fscache_cookie_see_withdraw, @@ -149,6 +150,7 @@ enum fscache_access_trace { EM(fscache_cookie_put_work, "PQ work ") \ EM(fscache_cookie_see_active, "- activ") \ EM(fscache_cookie_see_lru_discard, "- x-lru") \ + EM(fscache_cookie_see_lru_discard_clear,"- lrudc") \ EM(fscache_cookie_see_lru_do_one, "- lrudo") \ EM(fscache_cookie_see_relinquish, "- x-rlq") \ EM(fscache_cookie_see_withdraw, "- x-wth") \ From 5f4d487d01ff5349da38f7a09ca36bf6aa2e29fb Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 5 Dec 2022 21:04:53 +0100 Subject: [PATCH 933/963] net: phy: mxl-gpy: add MDINT workaround At least the GPY215B and GPY215C has a bug where it is still driving the interrupt line (MDINT) even after the interrupt status register is read and its bits are cleared. This will cause an interrupt storm. Although the MDINT is multiplexed with a GPIO pin and theoretically we could switch the pinmux to GPIO input mode, this isn't possible because the access to this register will stall exactly as long as the interrupt line is asserted. We exploit this very fact and just read a random internal register in our interrupt handler. This way, it will be delayed until the external interrupt line is released and an interrupt storm is avoided. The internal register access via the mailbox was deduced by looking at the downstream PHY API because the datasheet doesn't mention any of this. Fixes: 7d901a1e878a ("net: phy: add Maxlinear GPY115/21x/24x driver") Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20221205200453.3447866-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- drivers/net/phy/mxl-gpy.c | 85 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index 24bae27eedef..cae24091fb6f 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,14 @@ #define VPSPEC1_TEMP_STA 0x0E #define VPSPEC1_TEMP_STA_DATA GENMASK(9, 0) +/* Mailbox */ +#define VSPEC1_MBOX_DATA 0x5 +#define VSPEC1_MBOX_ADDRLO 0x6 +#define VSPEC1_MBOX_CMD 0x7 +#define VSPEC1_MBOX_CMD_ADDRHI GENMASK(7, 0) +#define VSPEC1_MBOX_CMD_RD (0 << 8) +#define VSPEC1_MBOX_CMD_READY BIT(15) + /* WoL */ #define VPSPEC2_WOL_CTL 0x0E06 #define VPSPEC2_WOL_AD01 0x0E08 @@ -77,7 +86,13 @@ #define VPSPEC2_WOL_AD45 0x0E0A #define WOL_EN BIT(0) +/* Internal registers, access via mbox */ +#define REG_GPIO0_OUT 0xd3ce00 + struct gpy_priv { + /* serialize mailbox acesses */ + struct mutex mbox_lock; + u8 fw_major; u8 fw_minor; }; @@ -187,6 +202,45 @@ static int gpy_hwmon_register(struct phy_device *phydev) } #endif +static int gpy_mbox_read(struct phy_device *phydev, u32 addr) +{ + struct gpy_priv *priv = phydev->priv; + int val, ret; + u16 cmd; + + mutex_lock(&priv->mbox_lock); + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_ADDRLO, + addr); + if (ret) + goto out; + + cmd = VSPEC1_MBOX_CMD_RD; + cmd |= FIELD_PREP(VSPEC1_MBOX_CMD_ADDRHI, addr >> 16); + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_CMD, cmd); + if (ret) + goto out; + + /* The mbox read is used in the interrupt workaround. It was observed + * that a read might take up to 2.5ms. This is also the time for which + * the interrupt line is stuck low. To be on the safe side, poll the + * ready bit for 10ms. + */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VSPEC1_MBOX_CMD, val, + (val & VSPEC1_MBOX_CMD_READY), + 500, 10000, false); + if (ret) + goto out; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_DATA); + +out: + mutex_unlock(&priv->mbox_lock); + return ret; +} + static int gpy_config_init(struct phy_device *phydev) { int ret; @@ -201,6 +255,13 @@ static int gpy_config_init(struct phy_device *phydev) return ret < 0 ? ret : 0; } +static bool gpy_has_broken_mdint(struct phy_device *phydev) +{ + /* At least these PHYs are known to have broken interrupt handling */ + return phydev->drv->phy_id == PHY_ID_GPY215B || + phydev->drv->phy_id == PHY_ID_GPY215C; +} + static int gpy_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -218,6 +279,7 @@ static int gpy_probe(struct phy_device *phydev) if (!priv) return -ENOMEM; phydev->priv = priv; + mutex_init(&priv->mbox_lock); fw_version = phy_read(phydev, PHY_FWV); if (fw_version < 0) @@ -492,6 +554,29 @@ static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev) if (!(reg & PHY_IMASK_MASK)) return IRQ_NONE; + /* The PHY might leave the interrupt line asserted even after PHY_ISTAT + * is read. To avoid interrupt storms, delay the interrupt handling as + * long as the PHY drives the interrupt line. An internal bus read will + * stall as long as the interrupt line is asserted, thus just read a + * random register here. + * Because we cannot access the internal bus at all while the interrupt + * is driven by the PHY, there is no way to make the interrupt line + * unstuck (e.g. by changing the pinmux to GPIO input) during that time + * frame. Therefore, polling is the best we can do and won't do any more + * harm. + * It was observed that this bug happens on link state and link speed + * changes on a GPY215B and GYP215C independent of the firmware version + * (which doesn't mean that this list is exhaustive). + */ + if (gpy_has_broken_mdint(phydev) && + (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC))) { + reg = gpy_mbox_read(phydev, REG_GPIO0_OUT); + if (reg < 0) { + phy_error(phydev); + return IRQ_NONE; + } + } + phy_trigger_machine(phydev); return IRQ_HANDLED; From 7d8c19bfc8ff3f78e5337107ca9246327fcb6b45 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Wed, 7 Dec 2022 09:53:10 +0800 Subject: [PATCH 934/963] net: plip: don't call kfree_skb/dev_kfree_skb() under spin_lock_irq() It is not allowed to call kfree_skb() or consume_skb() from hardware interrupt context or with interrupts being disabled. So replace kfree_skb/dev_kfree_skb() with dev_kfree_skb_irq() and dev_consume_skb_irq() under spin_lock_irq(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yang Yingliang Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20221207015310.2984909-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/plip/plip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index c8791e9b451d..40ce8abe6999 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -450,12 +450,12 @@ plip_bh_timeout_error(struct net_device *dev, struct net_local *nl, } rcv->state = PLIP_PK_DONE; if (rcv->skb) { - kfree_skb(rcv->skb); + dev_kfree_skb_irq(rcv->skb); rcv->skb = NULL; } snd->state = PLIP_PK_DONE; if (snd->skb) { - dev_kfree_skb(snd->skb); + dev_consume_skb_irq(snd->skb); snd->skb = NULL; } spin_unlock_irq(&nl->lock); From 803e84867de59a1e5d126666d25eb4860cfd2ebe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Dec 2022 10:13:51 +0000 Subject: [PATCH 935/963] ipv6: avoid use-after-free in ip6_fragment() Blamed commit claimed rcu_read_lock() was held by ip6_fragment() callers. It seems to not be always true, at least for UDP stack. syzbot reported: BUG: KASAN: use-after-free in ip6_dst_idev include/net/ip6_fib.h:245 [inline] BUG: KASAN: use-after-free in ip6_fragment+0x2724/0x2770 net/ipv6/ip6_output.c:951 Read of size 8 at addr ffff88801d403e80 by task syz-executor.3/7618 CPU: 1 PID: 7618 Comm: syz-executor.3 Not tainted 6.1.0-rc6-syzkaller-00012-g4312098baf37 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:284 [inline] print_report+0x15e/0x45d mm/kasan/report.c:395 kasan_report+0xbf/0x1f0 mm/kasan/report.c:495 ip6_dst_idev include/net/ip6_fib.h:245 [inline] ip6_fragment+0x2724/0x2770 net/ipv6/ip6_output.c:951 __ip6_finish_output net/ipv6/ip6_output.c:193 [inline] ip6_finish_output+0x9a3/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] ip6_local_out+0xb3/0x1a0 net/ipv6/output_core.c:161 ip6_send_skb+0xbb/0x340 net/ipv6/ip6_output.c:1966 udp_v6_send_skb+0x82a/0x18a0 net/ipv6/udp.c:1286 udp_v6_push_pending_frames+0x140/0x200 net/ipv6/udp.c:1313 udpv6_sendmsg+0x18da/0x2c80 net/ipv6/udp.c:1606 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 sock_write_iter+0x295/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2191 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9ed/0xdd0 fs/read_write.c:584 ksys_write+0x1ec/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fde3588c0d9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fde365b6168 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007fde359ac050 RCX: 00007fde3588c0d9 RDX: 000000000000ffdc RSI: 00000000200000c0 RDI: 000000000000000a RBP: 00007fde358e7ae9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fde35acfb1f R14: 00007fde365b6300 R15: 0000000000022000 Allocated by task 7618: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 __kasan_slab_alloc+0x82/0x90 mm/kasan/common.c:325 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slab.h:737 [inline] slab_alloc_node mm/slub.c:3398 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc+0x2b4/0x3d0 mm/slub.c:3422 dst_alloc+0x14a/0x1f0 net/core/dst.c:92 ip6_dst_alloc+0x32/0xa0 net/ipv6/route.c:344 ip6_rt_pcpu_alloc net/ipv6/route.c:1369 [inline] rt6_make_pcpu_route net/ipv6/route.c:1417 [inline] ip6_pol_route+0x901/0x1190 net/ipv6/route.c:2254 pol_lookup_func include/net/ip6_fib.h:582 [inline] fib6_rule_lookup+0x52e/0x6f0 net/ipv6/fib6_rules.c:121 ip6_route_output_flags_noref+0x2e6/0x380 net/ipv6/route.c:2625 ip6_route_output_flags+0x76/0x320 net/ipv6/route.c:2638 ip6_route_output include/net/ip6_route.h:98 [inline] ip6_dst_lookup_tail+0x5ab/0x1620 net/ipv6/ip6_output.c:1092 ip6_dst_lookup_flow+0x90/0x1d0 net/ipv6/ip6_output.c:1222 ip6_sk_dst_lookup_flow+0x553/0x980 net/ipv6/ip6_output.c:1260 udpv6_sendmsg+0x151d/0x2c80 net/ipv6/udp.c:1554 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 __sys_sendto+0x23a/0x340 net/socket.c:2117 __do_sys_sendto net/socket.c:2129 [inline] __se_sys_sendto net/socket.c:2125 [inline] __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 7599: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x40 mm/kasan/generic.c:511 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x160/0x1c0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1724 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1750 slab_free mm/slub.c:3661 [inline] kmem_cache_free+0xee/0x5c0 mm/slub.c:3683 dst_destroy+0x2ea/0x400 net/core/dst.c:127 rcu_do_batch kernel/rcu/tree.c:2250 [inline] rcu_core+0x81f/0x1980 kernel/rcu/tree.c:2510 __do_softirq+0x1fb/0xadc kernel/softirq.c:571 Last potentially related work creation: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 __kasan_record_aux_stack+0xbc/0xd0 mm/kasan/generic.c:481 call_rcu+0x9d/0x820 kernel/rcu/tree.c:2798 dst_release net/core/dst.c:177 [inline] dst_release+0x7d/0xe0 net/core/dst.c:167 refdst_drop include/net/dst.h:256 [inline] skb_dst_drop include/net/dst.h:268 [inline] skb_release_head_state+0x250/0x2a0 net/core/skbuff.c:838 skb_release_all net/core/skbuff.c:852 [inline] __kfree_skb net/core/skbuff.c:868 [inline] kfree_skb_reason+0x151/0x4b0 net/core/skbuff.c:891 kfree_skb_list_reason+0x4b/0x70 net/core/skbuff.c:901 kfree_skb_list include/linux/skbuff.h:1227 [inline] ip6_fragment+0x2026/0x2770 net/ipv6/ip6_output.c:949 __ip6_finish_output net/ipv6/ip6_output.c:193 [inline] ip6_finish_output+0x9a3/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] ip6_local_out+0xb3/0x1a0 net/ipv6/output_core.c:161 ip6_send_skb+0xbb/0x340 net/ipv6/ip6_output.c:1966 udp_v6_send_skb+0x82a/0x18a0 net/ipv6/udp.c:1286 udp_v6_push_pending_frames+0x140/0x200 net/ipv6/udp.c:1313 udpv6_sendmsg+0x18da/0x2c80 net/ipv6/udp.c:1606 inet6_sendmsg+0x9d/0xe0 net/ipv6/af_inet6.c:665 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 sock_write_iter+0x295/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2191 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9ed/0xdd0 fs/read_write.c:584 ksys_write+0x1ec/0x250 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Second to last potentially related work creation: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 __kasan_record_aux_stack+0xbc/0xd0 mm/kasan/generic.c:481 call_rcu+0x9d/0x820 kernel/rcu/tree.c:2798 dst_release net/core/dst.c:177 [inline] dst_release+0x7d/0xe0 net/core/dst.c:167 refdst_drop include/net/dst.h:256 [inline] skb_dst_drop include/net/dst.h:268 [inline] __dev_queue_xmit+0x1b9d/0x3ba0 net/core/dev.c:4211 dev_queue_xmit include/linux/netdevice.h:3008 [inline] neigh_resolve_output net/core/neighbour.c:1552 [inline] neigh_resolve_output+0x51b/0x840 net/core/neighbour.c:1532 neigh_output include/net/neighbour.h:546 [inline] ip6_finish_output2+0x56c/0x1530 net/ipv6/ip6_output.c:134 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x694/0x1170 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0x1f1/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:445 [inline] NF_HOOK include/linux/netfilter.h:302 [inline] NF_HOOK include/linux/netfilter.h:296 [inline] mld_sendpack+0xa09/0xe70 net/ipv6/mcast.c:1820 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x720/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 The buggy address belongs to the object at ffff88801d403dc0 which belongs to the cache ip6_dst_cache of size 240 The buggy address is located 192 bytes inside of 240-byte region [ffff88801d403dc0, ffff88801d403eb0) The buggy address belongs to the physical page: page:ffffea00007500c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d403 memcg:ffff888022f49c81 flags: 0xfff00000000200(slab|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000000200 ffffea0001ef6580 dead000000000002 ffff88814addf640 raw: 0000000000000000 00000000800c000c 00000001ffffffff ffff888022f49c81 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x112a20(GFP_ATOMIC|__GFP_NOWARN|__GFP_NORETRY|__GFP_HARDWALL), pid 3719, tgid 3719 (kworker/0:6), ts 136223432244, free_ts 136222971441 prep_new_page mm/page_alloc.c:2539 [inline] get_page_from_freelist+0x10b5/0x2d50 mm/page_alloc.c:4288 __alloc_pages+0x1cb/0x5b0 mm/page_alloc.c:5555 alloc_pages+0x1aa/0x270 mm/mempolicy.c:2285 alloc_slab_page mm/slub.c:1794 [inline] allocate_slab+0x213/0x300 mm/slub.c:1939 new_slab mm/slub.c:1992 [inline] ___slab_alloc+0xa91/0x1400 mm/slub.c:3180 __slab_alloc.constprop.0+0x56/0xa0 mm/slub.c:3279 slab_alloc_node mm/slub.c:3364 [inline] slab_alloc mm/slub.c:3406 [inline] __kmem_cache_alloc_lru mm/slub.c:3413 [inline] kmem_cache_alloc+0x31a/0x3d0 mm/slub.c:3422 dst_alloc+0x14a/0x1f0 net/core/dst.c:92 ip6_dst_alloc+0x32/0xa0 net/ipv6/route.c:344 icmp6_dst_alloc+0x71/0x680 net/ipv6/route.c:3261 mld_sendpack+0x5de/0xe70 net/ipv6/mcast.c:1809 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x720/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289 worker_thread+0x669/0x1090 kernel/workqueue.c:2436 kthread+0x2e8/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1459 [inline] free_pcp_prepare+0x65c/0xd90 mm/page_alloc.c:1509 free_unref_page_prepare mm/page_alloc.c:3387 [inline] free_unref_page+0x1d/0x4d0 mm/page_alloc.c:3483 __unfreeze_partials+0x17c/0x1a0 mm/slub.c:2586 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x184/0x210 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0x66/0x90 mm/kasan/common.c:302 kasan_slab_alloc include/linux/kasan.h:201 [inline] slab_post_alloc_hook mm/slab.h:737 [inline] slab_alloc_node mm/slub.c:3398 [inline] kmem_cache_alloc_node+0x304/0x410 mm/slub.c:3443 __alloc_skb+0x214/0x300 net/core/skbuff.c:497 alloc_skb include/linux/skbuff.h:1267 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1191 [inline] netlink_sendmsg+0x9a6/0xe10 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 __sys_sendto+0x23a/0x340 net/socket.c:2117 __do_sys_sendto net/socket.c:2129 [inline] __se_sys_sendto net/socket.c:2125 [inline] __x64_sys_sendto+0xe1/0x1b0 net/socket.c:2125 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 1758fd4688eb ("ipv6: remove unnecessary dst_hold() in ip6_fragment()") Reported-by: syzbot+8c0ac31aa9681abb9e2d@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Cc: Wei Wang Cc: Martin KaFai Lau Link: https://lore.kernel.org/r/20221206101351.2037285-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e19507614f64..60fd91bb5171 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -920,6 +920,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err < 0) goto fail; + /* We prevent @rt from being freed. */ + rcu_read_lock(); + for (;;) { /* Prepare header of the next frame, * before previous one went down. */ @@ -943,6 +946,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); + rcu_read_unlock(); return 0; } @@ -950,6 +954,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); + rcu_read_unlock(); return err; slow_path_clean: From 143d64bdbdb85787953a70332f9e5f658b678550 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 8 Dec 2022 14:59:15 +0800 Subject: [PATCH 936/963] LoongArch: Export symbol for function smp_send_reschedule() Function smp_send_reschedule() is standard kernel API, which is defined in header file include/linux/smp.h. However, on LoongArch it is defined as an inline function, this is confusing and kernel modules can not use this function. Now we define smp_send_reschedule() as a general function, and add a EXPORT_SYMBOL_GPL on this function, so that kernel modules can use it. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/smp.h | 10 ---------- arch/loongarch/kernel/smp.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 3dd172d9ffea..d82687390b4a 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -78,16 +78,6 @@ extern void calculate_cpu_foreign_map(void); */ extern void show_ipi_list(struct seq_file *p, int prec); -/* - * This function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -static inline void smp_send_reschedule(int cpu) -{ - loongson_send_ipi_single(cpu, SMP_RESCHEDULE); -} - static inline void arch_send_call_function_single_ipi(int cpu) { loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 6ed72f7ff278..14508d429ffa 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -149,6 +149,17 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } +/* + * This function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +void smp_send_reschedule(int cpu) +{ + loongson_send_ipi_single(cpu, SMP_RESCHEDULE); +} +EXPORT_SYMBOL_GPL(smp_send_reschedule); + irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; From b681604edab66f20dde767f4690e554f26c5bfb9 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 8 Dec 2022 14:59:15 +0800 Subject: [PATCH 937/963] LoongArch: mm: Fix huge page entry update for virtual machine In virtual machine (guest mode), the tlbwr instruction can not write the last entry of MTLB, so we need to make it non-present by invtlb and then write it by tlbfill. This also simplify the whole logic. Signed-off-by: Rui Wang Signed-off-by: Huacai Chen --- arch/loongarch/mm/tlbex.S | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index d8ee8fbc8c67..58781c6e4191 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -10,6 +10,8 @@ #include #include +#define INVTLB_ADDR_GFALSE_AND_ASID 5 + #define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3) @@ -136,13 +138,10 @@ tlb_huge_update_load: ori t0, ra, _PAGE_VALID st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr - - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 /* * A huge PTE describes an area the size of the @@ -287,13 +286,11 @@ tlb_huge_update_store: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -436,6 +433,11 @@ tlb_huge_update_modify: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 + /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -466,7 +468,7 @@ tlb_huge_update_modify: addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX - tlbwr + tlbfill /* Reset default page size */ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) From 38eb496d85b89d6ad8fe9701acd2ac1de804b6c1 Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Thu, 8 Dec 2022 14:59:15 +0800 Subject: [PATCH 938/963] docs/LoongArch: Add booting description 1, Describe the information passed from BootLoader to kernel. 2, Describe the meaning and values of the kernel image header field. Suggested-by: Xiaotian Wu Signed-off-by: Yanteng Si Signed-off-by: Huacai Chen --- Documentation/loongarch/booting.rst | 42 +++++++++++++++++++++++++++++ Documentation/loongarch/index.rst | 1 + 2 files changed, 43 insertions(+) create mode 100644 Documentation/loongarch/booting.rst diff --git a/Documentation/loongarch/booting.rst b/Documentation/loongarch/booting.rst new file mode 100644 index 000000000000..91eccd410478 --- /dev/null +++ b/Documentation/loongarch/booting.rst @@ -0,0 +1,42 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +Booting Linux/LoongArch +======================= + +:Author: Yanteng Si +:Date: 18 Nov 2022 + +Information passed from BootLoader to kernel +============================================ + +LoongArch supports ACPI and FDT. The information that needs to be passed +to the kernel includes the memmap, the initrd, the command line, optionally +the ACPI/FDT tables, and so on. + +The kernel is passed the following arguments on `kernel_entry` : + + - a0 = efi_boot: `efi_boot` is a flag indicating whether + this boot environment is fully UEFI-compliant. + + - a1 = cmdline: `cmdline` is a pointer to the kernel command line. + + - a2 = systemtable: `systemtable` points to the EFI system table. + All pointers involved at this stage are in physical addresses. + +Header of Linux/LoongArch kernel images +======================================= + +Linux/LoongArch kernel images are EFI images. Being PE files, they have +a 64-byte header structured like:: + + u32 MZ_MAGIC /* "MZ", MS-DOS header */ + u32 res0 = 0 /* Reserved */ + u64 kernel_entry /* Kernel entry point */ + u64 _end - _text /* Kernel image effective size */ + u64 load_offset /* Kernel image load offset from start of RAM */ + u64 res1 = 0 /* Reserved */ + u64 res2 = 0 /* Reserved */ + u64 res3 = 0 /* Reserved */ + u32 LINUX_PE_MAGIC /* Magic number */ + u32 pe_header - _head /* Offset to the PE header */ diff --git a/Documentation/loongarch/index.rst b/Documentation/loongarch/index.rst index aaba648db907..c779bfa00c05 100644 --- a/Documentation/loongarch/index.rst +++ b/Documentation/loongarch/index.rst @@ -9,6 +9,7 @@ LoongArch Architecture :numbered: introduction + booting irq-chip-model features From 1385313d8bc112760559f06f64708d936b3f2d7c Mon Sep 17 00:00:00 2001 From: Yanteng Si Date: Thu, 8 Dec 2022 14:59:15 +0800 Subject: [PATCH 939/963] docs/zh_CN: Add LoongArch booting description's translation Translate ../loongarch/booting.rst into Chinese. Suggested-by: Xiaotian Wu Signed-off-by: Yanteng Si Signed-off-by: Huacai Chen --- .../translations/zh_CN/loongarch/booting.rst | 48 +++++++++++++++++++ .../translations/zh_CN/loongarch/index.rst | 1 + 2 files changed, 49 insertions(+) create mode 100644 Documentation/translations/zh_CN/loongarch/booting.rst diff --git a/Documentation/translations/zh_CN/loongarch/booting.rst b/Documentation/translations/zh_CN/loongarch/booting.rst new file mode 100644 index 000000000000..fb6440c438f0 --- /dev/null +++ b/Documentation/translations/zh_CN/loongarch/booting.rst @@ -0,0 +1,48 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/loongarch/booting.rst + +:翻译: + + 司延腾 Yanteng Si + +==================== +启动 Linux/LoongArch +==================== + +:作者: 司延腾 +:日期: 2022年11月18日 + +BootLoader传递给内核的信息 +========================== + +LoongArch支持ACPI和FDT启动,需要传递给内核的信息包括memmap、initrd、cmdline、可 +选的ACPI/FDT表等。 + +内核在 `kernel_entry` 入口处被传递以下参数: + + - a0 = efi_boot: `efi_boot` 是一个标志,表示这个启动环境是否完全符合UEFI + 的要求。 + + - a1 = cmdline: `cmdline` 是一个指向内核命令行的指针。 + + - a2 = systemtable: `systemtable` 指向EFI的系统表,在这个阶段涉及的所有 + 指针都是物理地址。 + +Linux/LoongArch内核镜像文件头 +============================= + +内核镜像是EFI镜像。作为PE文件,它们有一个64字节的头部结构体,如下所示:: + + u32 MZ_MAGIC /* "MZ", MS-DOS 头 */ + u32 res0 = 0 /* 保留 */ + u64 kernel_entry /* 内核入口点 */ + u64 _end - _text /* 内核镜像有效大小 */ + u64 load_offset /* 加载内核镜像相对内存起始地址的偏移量 */ + u64 res1 = 0 /* 保留 */ + u64 res2 = 0 /* 保留 */ + u64 res3 = 0 /* 保留 */ + u32 LINUX_PE_MAGIC /* 魔术数 */ + u32 pe_header - _head /* 到PE头的偏移量 */ diff --git a/Documentation/translations/zh_CN/loongarch/index.rst b/Documentation/translations/zh_CN/loongarch/index.rst index 7d23eb78379d..0273a08342f7 100644 --- a/Documentation/translations/zh_CN/loongarch/index.rst +++ b/Documentation/translations/zh_CN/loongarch/index.rst @@ -14,6 +14,7 @@ LoongArch体系结构 :numbered: introduction + booting irq-chip-model features From ef19964da8a668c683f1d38274f6fb756e047945 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Mon, 5 Dec 2022 16:23:27 +0100 Subject: [PATCH 940/963] Revert "ARM: dts: imx7: Fix NAND controller size-cells" This reverts commit 753395ea1e45c724150070b5785900b6a44bd5fb. It introduced a boot regression on colibri-imx7, and potentially any other i.MX7 boards with MTD partition list generated into the fdt by U-Boot. While the commit we are reverting here is not obviously wrong, it fixes only a dt binding checker warning that is non-functional, while it introduces a boot regression and there is no obvious fix ready. Fixes: 753395ea1e45 ("ARM: dts: imx7: Fix NAND controller size-cells") Signed-off-by: Francesco Dolcini Reviewed-by: Miquel Raynal Acked-by: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/Y4dgBTGNWpM6SQXI@francesco-nb.int.toradex.com/ Link: https://lore.kernel.org/all/20221205144917.6514168a@xps-13/ Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/imx7s.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 03d2e8544a4e..0fc9e6b8b05d 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -1270,10 +1270,10 @@ dma_apbh: dma-apbh@33000000 { clocks = <&clks IMX7D_NAND_USDHC_BUS_RAWNAND_CLK>; }; - gpmi: nand-controller@33002000 { + gpmi: nand-controller@33002000{ compatible = "fsl,imx7d-gpmi-nand"; #address-cells = <1>; - #size-cells = <0>; + #size-cells = <1>; reg = <0x33002000 0x2000>, <0x33004000 0x4000>; reg-names = "gpmi-nand", "bch"; interrupts = ; From ed14e5903638f6eb868e3e2b4e610985e6a6c876 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 7 Dec 2022 09:50:01 +0800 Subject: [PATCH 941/963] net: thunderbolt: fix memory leak in tbnet_open() When tb_ring_alloc_rx() failed in tbnet_open(), ida that allocated in tb_xdomain_alloc_out_hopid() is not released. Add tb_xdomain_release_out_hopid() to the error path to release ida. Fixes: 180b0689425c ("thunderbolt: Allow multiple DMA tunnels over a single XDomain connection") Signed-off-by: Zhengchao Shao Acked-by: Mika Westerberg Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20221207015001.1755826-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/thunderbolt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index a52ee2bf5575..6312f67f260e 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -914,6 +914,7 @@ static int tbnet_open(struct net_device *dev) eof_mask, tbnet_start_poll, net); if (!ring) { netdev_err(dev, "failed to allocate Rx ring\n"); + tb_xdomain_release_out_hopid(xd, hopid); tb_ring_free(net->tx_ring.ring); net->tx_ring.ring = NULL; return -ENOMEM; From cdd97383e19d4afe29adc3376025a15ae3bab3a3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 7 Dec 2022 10:06:31 +0300 Subject: [PATCH 942/963] net: mvneta: Fix an out of bounds check In an earlier commit, I added a bounds check to prevent an out of bounds read and a WARN(). On further discussion and consideration that check was probably too aggressive. Instead of returning -EINVAL, a better fix would be to just prevent the out of bounds read but continue the process. Background: The value of "pp->rxq_def" is a number between 0-7 by default, or even higher depending on the value of "rxq_number", which is a module parameter. If the value is more than the number of available CPUs then it will trigger the WARN() in cpu_max_bits_warn(). Fixes: e8b4fc13900b ("net: mvneta: Prevent out of bounds read in mvneta_config_rss()") Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/Y5A7d1E5ccwHTYPf@kadam Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 66b7f27c9a48..5aefaaff0871 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4271,7 +4271,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) /* Use the cpu associated to the rxq when it is online, in all * the other cases, use the cpu 0 which can't be offline. */ - if (cpu_online(pp->rxq_def)) + if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def)) elected_cpu = pp->rxq_def; max_cpu = num_present_cpus(); @@ -4927,9 +4927,6 @@ static int mvneta_config_rss(struct mvneta_port *pp) napi_disable(&pp->napi); } - if (pp->indir[0] >= nr_cpu_ids) - return -EINVAL; - pp->rxq_def = pp->indir[0]; /* Update unicast mapping */ From 38099024e51ee37dee5f0f577ca37175c932e3f7 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Wed, 7 Dec 2022 12:16:18 +0200 Subject: [PATCH 943/963] macsec: add missing attribute validation for offload Add missing attribute validation for IFLA_MACSEC_OFFLOAD to the netlink policy. Fixes: 791bb3fcafce ("net: macsec: add support for specifying offload upon link creation") Signed-off-by: Emeel Hakim Reviewed-by: Jiri Pirko Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/20221207101618.989-1-ehakim@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index f41f67b583db..2fbac51b9b19 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3698,6 +3698,7 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCB] = { .type = NLA_U8 }, [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 }, [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 }, + [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 }, }; static void macsec_free_netdev(struct net_device *dev) From ebaaadc332cd21e9df4dcf9ce12552d9354bbbe4 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Wed, 7 Dec 2022 11:53:04 +0100 Subject: [PATCH 944/963] s390/qeth: fix use-after-free in hsci KASAN found that addr was dereferenced after br2dev_event_work was freed. ================================================================== BUG: KASAN: use-after-free in qeth_l2_br2dev_worker+0x5ba/0x6b0 Read of size 1 at addr 00000000fdcea440 by task kworker/u760:4/540 CPU: 17 PID: 540 Comm: kworker/u760:4 Tainted: G E 6.1.0-20221128.rc7.git1.5aa3bed4ce83.300.fc36.s390x+kasan #1 Hardware name: IBM 8561 T01 703 (LPAR) Workqueue: 0.0.8000_event qeth_l2_br2dev_worker Call Trace: [<000000016944d4ce>] dump_stack_lvl+0xc6/0xf8 [<000000016942cd9c>] print_address_description.constprop.0+0x34/0x2a0 [<000000016942d118>] print_report+0x110/0x1f8 [<0000000167a7bd04>] kasan_report+0xfc/0x128 [<000000016938d79a>] qeth_l2_br2dev_worker+0x5ba/0x6b0 [<00000001673edd1e>] process_one_work+0x76e/0x1128 [<00000001673ee85c>] worker_thread+0x184/0x1098 [<000000016740718a>] kthread+0x26a/0x310 [<00000001672c606a>] __ret_from_fork+0x8a/0xe8 [<00000001694711da>] ret_from_fork+0xa/0x40 Allocated by task 108338: kasan_save_stack+0x40/0x68 kasan_set_track+0x36/0x48 __kasan_kmalloc+0xa0/0xc0 qeth_l2_switchdev_event+0x25a/0x738 atomic_notifier_call_chain+0x9c/0xf8 br_switchdev_fdb_notify+0xf4/0x110 fdb_notify+0x122/0x180 fdb_add_entry.constprop.0.isra.0+0x312/0x558 br_fdb_add+0x59e/0x858 rtnl_fdb_add+0x58a/0x928 rtnetlink_rcv_msg+0x5f8/0x8d8 netlink_rcv_skb+0x1f2/0x408 netlink_unicast+0x570/0x790 netlink_sendmsg+0x752/0xbe0 sock_sendmsg+0xca/0x110 ____sys_sendmsg+0x510/0x6a8 ___sys_sendmsg+0x12a/0x180 __sys_sendmsg+0xe6/0x168 __do_sys_socketcall+0x3c8/0x468 do_syscall+0x22c/0x328 __do_syscall+0x94/0xf0 system_call+0x82/0xb0 Freed by task 540: kasan_save_stack+0x40/0x68 kasan_set_track+0x36/0x48 kasan_save_free_info+0x4c/0x68 ____kasan_slab_free+0x14e/0x1a8 __kasan_slab_free+0x24/0x30 __kmem_cache_free+0x168/0x338 qeth_l2_br2dev_worker+0x154/0x6b0 process_one_work+0x76e/0x1128 worker_thread+0x184/0x1098 kthread+0x26a/0x310 __ret_from_fork+0x8a/0xe8 ret_from_fork+0xa/0x40 Last potentially related work creation: kasan_save_stack+0x40/0x68 __kasan_record_aux_stack+0xbe/0xd0 insert_work+0x56/0x2e8 __queue_work+0x4ce/0xd10 queue_work_on+0xf4/0x100 qeth_l2_switchdev_event+0x520/0x738 atomic_notifier_call_chain+0x9c/0xf8 br_switchdev_fdb_notify+0xf4/0x110 fdb_notify+0x122/0x180 fdb_add_entry.constprop.0.isra.0+0x312/0x558 br_fdb_add+0x59e/0x858 rtnl_fdb_add+0x58a/0x928 rtnetlink_rcv_msg+0x5f8/0x8d8 netlink_rcv_skb+0x1f2/0x408 netlink_unicast+0x570/0x790 netlink_sendmsg+0x752/0xbe0 sock_sendmsg+0xca/0x110 ____sys_sendmsg+0x510/0x6a8 ___sys_sendmsg+0x12a/0x180 __sys_sendmsg+0xe6/0x168 __do_sys_socketcall+0x3c8/0x468 do_syscall+0x22c/0x328 __do_syscall+0x94/0xf0 system_call+0x82/0xb0 Second to last potentially related work creation: kasan_save_stack+0x40/0x68 __kasan_record_aux_stack+0xbe/0xd0 kvfree_call_rcu+0xb2/0x760 kernfs_unlink_open_file+0x348/0x430 kernfs_fop_release+0xc2/0x320 __fput+0x1ae/0x768 task_work_run+0x1bc/0x298 exit_to_user_mode_prepare+0x1a0/0x1a8 __do_syscall+0x94/0xf0 system_call+0x82/0xb0 The buggy address belongs to the object at 00000000fdcea400 which belongs to the cache kmalloc-96 of size 96 The buggy address is located 64 bytes inside of 96-byte region [00000000fdcea400, 00000000fdcea460) The buggy address belongs to the physical page: page:000000005a9c26e8 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xfdcea flags: 0x3ffff00000000200(slab|node=0|zone=1|lastcpupid=0x1ffff) raw: 3ffff00000000200 0000000000000000 0000000100000122 000000008008cc00 raw: 0000000000000000 0020004100000000 ffffffff00000001 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: 00000000fdcea300: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc 00000000fdcea380: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc >00000000fdcea400: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ^ 00000000fdcea480: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc 00000000fdcea500: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc ================================================================== Fixes: f7936b7b2663 ("s390/qeth: Update MACs of LEARNING_SYNC device") Reported-by: Thorsten Winkler Signed-off-by: Alexandra Winter Reviewed-by: Wenjia Zhang Reviewed-by: Thorsten Winkler Link: https://lore.kernel.org/r/20221207105304.20494-1-wintera@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_l2_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 9dc935886e9f..c6ded3fdd715 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -758,7 +758,6 @@ static void qeth_l2_br2dev_worker(struct work_struct *work) struct list_head *iter; int err = 0; - kfree(br2dev_event_work); QETH_CARD_TEXT_(card, 4, "b2dw%04lx", event); QETH_CARD_TEXT_(card, 4, "ma%012llx", ether_addr_to_u64(addr)); @@ -815,6 +814,7 @@ static void qeth_l2_br2dev_worker(struct work_struct *work) dev_put(brdev); dev_put(lsyncdev); dev_put(dstdev); + kfree(br2dev_event_work); } static int qeth_l2_br2dev_queue_work(struct net_device *brdev, From f8bac7f9fdb0017b32157957ffffd490f95faa07 Mon Sep 17 00:00:00 2001 From: "Radu Nicolae Pirea (OSS)" Date: Wed, 7 Dec 2022 15:23:47 +0200 Subject: [PATCH 945/963] net: dsa: sja1105: avoid out of bounds access in sja1105_init_l2_policing() The SJA1105 family has 45 L2 policing table entries (SJA1105_MAX_L2_POLICING_COUNT) and SJA1110 has 110 (SJA1110_MAX_L2_POLICING_COUNT). Keeping the table structure but accounting for the difference in port count (5 in SJA1105 vs 10 in SJA1110) does not fully explain the difference. Rather, the SJA1110 also has L2 ingress policers for multicast traffic. If a packet is classified as multicast, it will be processed by the policer index 99 + SRCPORT. The sja1105_init_l2_policing() function initializes all L2 policers such that they don't interfere with normal packet reception by default. To have a common code between SJA1105 and SJA1110, the index of the multicast policer for the port is calculated because it's an index that is out of bounds for SJA1105 but in bounds for SJA1110, and a bounds check is performed. The code fails to do the proper thing when determining what to do with the multicast policer of port 0 on SJA1105 (ds->num_ports = 5). The "mcast" index will be equal to 45, which is also equal to table->ops->max_entry_count (SJA1105_MAX_L2_POLICING_COUNT). So it passes through the check. But at the same time, SJA1105 doesn't have multicast policers. So the code programs the SHARINDX field of an out-of-bounds element in the L2 Policing table of the static config. The comparison between index 45 and 45 entries should have determined the code to not access this policer index on SJA1105, since its memory wasn't even allocated. With enough bad luck, the out-of-bounds write could even overwrite other valid kernel data, but in this case, the issue was detected using KASAN. Kernel log: sja1105 spi5.0: Probed switch chip: SJA1105Q ================================================================== BUG: KASAN: slab-out-of-bounds in sja1105_setup+0x1cbc/0x2340 Write of size 8 at addr ffffff880bd57708 by task kworker/u8:0/8 ... Workqueue: events_unbound deferred_probe_work_func Call trace: ... sja1105_setup+0x1cbc/0x2340 dsa_register_switch+0x1284/0x18d0 sja1105_probe+0x748/0x840 ... Allocated by task 8: ... sja1105_setup+0x1bcc/0x2340 dsa_register_switch+0x1284/0x18d0 sja1105_probe+0x748/0x840 ... Fixes: 38fbe91f2287 ("net: dsa: sja1105: configure the multicast policers, if present") CC: stable@vger.kernel.org # 5.15+ Signed-off-by: Radu Nicolae Pirea (OSS) Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20221207132347.38698-1-radu-nicolae.pirea@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/sja1105/sja1105_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 412666111b0c..b70dcf32a26d 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1038,7 +1038,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) policing[bcast].sharindx = port; /* Only SJA1110 has multicast policers */ - if (mcast <= table->ops->max_entry_count) + if (mcast < table->ops->max_entry_count) policing[mcast].sharindx = port; } From fbf8321238bac04368f57af572e05a9c01347a0b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 7 Dec 2022 16:53:15 -1000 Subject: [PATCH 946/963] memcg: Fix possible use-after-free in memcg_write_event_control() memcg_write_event_control() accesses the dentry->d_name of the specified control fd to route the write call. As a cgroup interface file can't be renamed, it's safe to access d_name as long as the specified file is a regular cgroup file. Also, as these cgroup interface files can't be removed before the directory, it's safe to access the parent too. Prior to 347c4a874710 ("memcg: remove cgroup_event->cft"), there was a call to __file_cft() which verified that the specified file is a regular cgroupfs file before further accesses. The cftype pointer returned from __file_cft() was no longer necessary and the commit inadvertently dropped the file type check with it allowing any file to slip through. With the invarients broken, the d_name and parent accesses can now race against renames and removals of arbitrary files and cause use-after-free's. Fix the bug by resurrecting the file type check in __file_cft(). Now that cgroupfs is implemented through kernfs, checking the file operations needs to go through a layer of indirection. Instead, let's check the superblock and dentry type. Signed-off-by: Tejun Heo Fixes: 347c4a874710 ("memcg: remove cgroup_event->cft") Cc: stable@kernel.org # v3.14+ Reported-by: Jann Horn Acked-by: Johannes Weiner Acked-by: Roman Gushchin Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup-internal.h | 1 - mm/memcontrol.c | 15 +++++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 528bd44b59e2..2b7d077de7ef 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -68,6 +68,7 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index fd4020835ec6..367b0a42ada9 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -167,7 +167,6 @@ struct cgroup_mgctx { extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; -extern struct file_system_type cgroup_fs_type; /* iterate across the hierarchies */ #define for_each_root(root) \ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a1a35c12635e..266a1ab05434 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4832,6 +4832,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, unsigned int efd, cfd; struct fd efile; struct fd cfile; + struct dentry *cdentry; const char *name; char *endp; int ret; @@ -4885,6 +4886,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, if (ret < 0) goto out_put_cfile; + /* + * The control file must be a regular cgroup1 file. As a regular cgroup + * file can't be renamed, it's safe to access its name afterwards. + */ + cdentry = cfile.file->f_path.dentry; + if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) { + ret = -EINVAL; + goto out_put_cfile; + } + /* * Determine the event callbacks and set them in @event. This used * to be done via struct cftype but cgroup core no longer knows @@ -4893,7 +4904,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * * DO NOT ADD NEW FILES. */ - name = cfile.file->f_path.dentry->d_name.name; + name = cdentry->d_name.name; if (!strcmp(name, "memory.usage_in_bytes")) { event->register_event = mem_cgroup_usage_register_event; @@ -4917,7 +4928,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent, + cfile_css = css_tryget_online_from_dir(cdentry->d_parent, &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css)) From 521dca14044450e292714d2bdafabd2b92e97ece Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 5 Dec 2022 09:28:33 -0800 Subject: [PATCH 947/963] ANDROID: clang: update to 16.0.1 Bug: 261223935 Change-Id: I2127dcf4f7ca678f14e920dac667758457a4b270 Signed-off-by: Nick Desaulniers --- build.config.constants | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.config.constants b/build.config.constants index 08567feef137..3ac4d70578ec 100644 --- a/build.config.constants +++ b/build.config.constants @@ -1,2 +1,2 @@ BRANCH=android-mainline -CLANG_VERSION=r468909 +CLANG_VERSION=r475365 From 40f2432b53a01b6d5e3a9057f1d5c406930e1360 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 7 Dec 2022 15:24:32 +0100 Subject: [PATCH 948/963] Revert "HID: logitech-hidpp: Remove special-casing of Bluetooth devices" This reverts commit 8544c812e43ab7bdf40458411b83987b8cba924d. We need to revert commit 532223c8ac57 ("HID: logitech-hidpp: Enable HID++ for all the Logitech Bluetooth devices") because that commit might make hid-logitech-hidpp bind on mice that are not well enough supported by hid-logitech-hidpp, and the end result is that the probe of those mice is now returning -ENODEV, leaving the end user with a dead mouse. Given that commit 8544c812e43a ("HID: logitech-hidpp: Remove special-casing of Bluetooth devices") is a direct dependency of 532223c8ac57, revert it too. Note that this also adapt according to commit 908d325e1665 ("HID: logitech-hidpp: Detect hi-res scrolling support") to re-add support of the devices that were removed from that commit too. I have locally an MX Master and I tested this device with that revert, ensuring we still have high-res scrolling. Reported-by: Rafael J . Wysocki Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 71a9c258a20b..e9c7aa4f293b 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4367,6 +4367,15 @@ static const struct hid_device_id hidpp_devices[] = { { /* MX5500 keyboard over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, + { /* M-RCQ142 V470 Cordless Laser Mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) }, + { /* MX Master mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) }, + { /* MX Ergo trackball over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) }, + { /* MX Master 3 mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb023) }, { /* And try to enable HID++ for all the Logitech Bluetooth devices */ HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_ANY, USB_VENDOR_ID_LOGITECH, HID_ANY_ID) }, From a9d9e46c755a189ccb44d91b8cf737742a975de8 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 7 Dec 2022 15:24:33 +0100 Subject: [PATCH 949/963] Revert "HID: logitech-hidpp: Enable HID++ for all the Logitech Bluetooth devices" This reverts commit 532223c8ac57605a10e46dc0ab23dcf01c9acb43. As reported in [0], hid-logitech-hidpp now binds on all bluetooth mice, but there are corner cases where hid-logitech-hidpp just gives up on the mouse. This leads the end user with a dead mouse. Given that we are at -rc8, we are definitively too late to find a proper fix. We already identified 2 issues less than 24 hours after the bug report. One in that ->match() was never designed to be used anywhere else than in hid-generic, and the other that hid-logitech-hidpp has corner cases where it gives up on devices it is not supposed to. So we have no choice but postpone this patch to the next kernel release. [0] https://lore.kernel.org/linux-input/CAJZ5v0g-_o4AqMgNwihCb0jrwrcJZfRrX=jv8aH54WNKO7QB8A@mail.gmail.com/ Reported-by: Rafael J . Wysocki Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index e9c7aa4f293b..8a2aac18dcc5 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4269,21 +4269,6 @@ static void hidpp_remove(struct hid_device *hdev) mutex_destroy(&hidpp->send_mutex); } -static const struct hid_device_id unhandled_hidpp_devices[] = { - /* Logitech Harmony Adapter for PS3, handled in hid-sony */ - { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) }, - /* Handled in hid-generic */ - { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD) }, - {} -}; - -static bool hidpp_match(struct hid_device *hdev, - bool ignore_special_driver) -{ - /* Refuse to handle devices handled by other HID drivers */ - return !hid_match_id(hdev, unhandled_hidpp_devices); -} - #define LDJ_DEVICE(product) \ HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, \ USB_VENDOR_ID_LOGITECH, (product)) @@ -4376,9 +4361,6 @@ static const struct hid_device_id hidpp_devices[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) }, { /* MX Master 3 mouse over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb023) }, - - { /* And try to enable HID++ for all the Logitech Bluetooth devices */ - HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_ANY, USB_VENDOR_ID_LOGITECH, HID_ANY_ID) }, {} }; @@ -4392,7 +4374,6 @@ static const struct hid_usage_id hidpp_usages[] = { static struct hid_driver hidpp_driver = { .name = "logitech-hidpp-device", .id_table = hidpp_devices, - .match = hidpp_match, .report_fixup = hidpp_report_fixup, .probe = hidpp_probe, .remove = hidpp_remove, From 13d2b9ab3ee832c44ffd9f345526ffd6812ac7c3 Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Thu, 8 Dec 2022 11:58:46 -0800 Subject: [PATCH 950/963] ANDROID: consolidate.fragment: Remove dead options Remove dead options from consolidate.fragment. These symbols no longer exist. Bug: 261837905 Change-Id: Ieb6bd0d033e7725560e3a0268f19f598771d1e18 Signed-off-by: Elliot Berman --- arch/arm64/configs/consolidate.fragment | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/configs/consolidate.fragment b/arch/arm64/configs/consolidate.fragment index 5bb0e45a50f3..8fb5be141b09 100644 --- a/arch/arm64/configs/consolidate.fragment +++ b/arch/arm64/configs/consolidate.fragment @@ -3,7 +3,6 @@ # CONFIG_BITFIELD_KUNIT is not set # CONFIG_BITS_TEST is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y -CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1 CONFIG_CMDLINE="kasan.stacktrace=off stack_depot_disable=off page_owner=on no_hash_pointers panic_on_taint=0x20" CONFIG_DEBUG_KMEMLEAK=y # CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN is not set @@ -55,7 +54,6 @@ CONFIG_TASKS_RUDE_RCU=y # CONFIG_TEST_BPF is not set # CONFIG_TEST_FIRMWARE is not set # CONFIG_TEST_FREE_PAGES is not set -# CONFIG_TEST_HASH is not set # CONFIG_TEST_HEXDUMP is not set # CONFIG_TEST_IDA is not set # CONFIG_TEST_KMOD is not set @@ -65,7 +63,6 @@ CONFIG_TASKS_RUDE_RCU=y # CONFIG_TEST_MEMCAT_P is not set # CONFIG_TEST_MEMINIT is not set # CONFIG_TEST_MIN_HEAP is not set -# CONFIG_TEST_OVERFLOW is not set # CONFIG_TEST_PRINTF is not set # CONFIG_TEST_RHASHTABLE is not set # CONFIG_TEST_SORT is not set From d0da01fd61ec24fae56f1fa8379425855618d7e9 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Mon, 21 Nov 2022 16:11:21 -0800 Subject: [PATCH 951/963] ANDROID: GKI: Add 64-bit RISC-V config Add a "fake" GKI for RISC-V based on the arm64 one. This implementation simply symlinks the arm64 gki_defconfig and merges in two other configs: - arch/riscv/configs/64-bit.config This is an upstream file which enables RISC-V for 64-bit. - arch/riscv/configs/gki.config This is a workaround file which enables things the ARM64 architecture pre-selects, such as GPIOLIB. Without enabling these options, some drivers are incorrectly dropped from the gki_defconfig. Bug: 260012551 Signed-off-by: Alistair Delva Change-Id: Ifad59ed483bdbb736e905eefc5107328155a361b --- BUILD.bazel | 3 +++ arch/riscv/configs/gki.config | 2 ++ arch/riscv/configs/gki_defconfig | 1 + build.config.gki.riscv64 | 26 ++++++++++++++++++++++++++ build.config.riscv64 | 14 ++++++++++++++ 5 files changed, 46 insertions(+) create mode 100644 arch/riscv/configs/gki.config create mode 120000 arch/riscv/configs/gki_defconfig create mode 100644 build.config.gki.riscv64 create mode 100644 build.config.riscv64 diff --git a/BUILD.bazel b/BUILD.bazel index ed296689b215..82bc747fc41c 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -22,6 +22,9 @@ define_common_kernels(target_configs = { "kernel_aarch64_debug": { "module_implicit_outs": COMMON_GKI_MODULES_LIST, }, + "kernel_riscv64": { + "module_implicit_outs": COMMON_GKI_MODULES_LIST, + }, "kernel_x86_64": { "kmi_symbol_list_strict_mode": False, "module_implicit_outs": COMMON_GKI_MODULES_LIST, diff --git a/arch/riscv/configs/gki.config b/arch/riscv/configs/gki.config new file mode 100644 index 000000000000..5558fef96af9 --- /dev/null +++ b/arch/riscv/configs/gki.config @@ -0,0 +1,2 @@ +CONFIG_CPU_FREQ=y +CONFIG_GPIOLIB=y diff --git a/arch/riscv/configs/gki_defconfig b/arch/riscv/configs/gki_defconfig new file mode 120000 index 000000000000..3388ea403df4 --- /dev/null +++ b/arch/riscv/configs/gki_defconfig @@ -0,0 +1 @@ +../../arm64/configs/gki_defconfig \ No newline at end of file diff --git a/build.config.gki.riscv64 b/build.config.gki.riscv64 new file mode 100644 index 000000000000..0eb01c4975de --- /dev/null +++ b/build.config.gki.riscv64 @@ -0,0 +1,26 @@ +. ${ROOT_DIR}/${KERNEL_DIR}/build.config.common +. ${ROOT_DIR}/${KERNEL_DIR}/build.config.riscv64 +. ${ROOT_DIR}/${KERNEL_DIR}/build.config.gki + +MAKE_GOALS="${MAKE_GOALS} +Image.lz4 +Image.gz +" + +FILES="${FILES} +arch/riscv/boot/Image.lz4 +arch/riscv/boot/Image.gz +" + +BUILD_SYSTEM_DLKM=1 +MODULES_LIST=${ROOT_DIR}/${KERNEL_DIR}/android/gki_system_dlkm_modules + +BUILD_GKI_CERTIFICATION_TOOLS=1 + +BUILD_GKI_ARTIFACTS=1 +BUILD_GKI_BOOT_IMG_SIZE=67108864 +BUILD_GKI_BOOT_IMG_GZ_SIZE=47185920 +BUILD_GKI_BOOT_IMG_LZ4_SIZE=53477376 + +PRE_DEFCONFIG_CMDS="mkdir -p \${OUT_DIR}/arch/riscv/configs/ && cat ${ROOT_DIR}/${KERNEL_DIR}/arch/riscv/configs/gki_defconfig ${ROOT_DIR}/${KERNEL_DIR}/arch/riscv/configs/64-bit.config ${ROOT_DIR}/${KERNEL_DIR}/arch/riscv/configs/gki.config > \${OUT_DIR}/arch/riscv/configs/${DEFCONFIG};" +POST_DEFCONFIG_CMDS="" diff --git a/build.config.riscv64 b/build.config.riscv64 new file mode 100644 index 000000000000..0ef054b61a84 --- /dev/null +++ b/build.config.riscv64 @@ -0,0 +1,14 @@ +ARCH=riscv +MAKE_GOALS=" +Image +modules +" + +FILES=" +arch/riscv/boot/Image +vmlinux +System.map +vmlinux.symvers +modules.builtin +modules.builtin.modinfo +" From 8d1e8ba1cab36bb0a400e05fa908f2b80ec4391a Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Wed, 7 Dec 2022 11:11:05 -0800 Subject: [PATCH 952/963] ANDROID: qki: Enable ttyMSM0 console in consolidate.fragment Enable ttyMSM0 in consolidated.fragment. Bug: 223797063 Change-Id: I01dda3677aea2990d8adbf5a0ef6aeeec346bc29 Signed-off-by: Elliot Berman --- arch/arm64/configs/consolidate.fragment | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/configs/consolidate.fragment b/arch/arm64/configs/consolidate.fragment index 8fb5be141b09..18cc65714dc1 100644 --- a/arch/arm64/configs/consolidate.fragment +++ b/arch/arm64/configs/consolidate.fragment @@ -3,7 +3,7 @@ # CONFIG_BITFIELD_KUNIT is not set # CONFIG_BITS_TEST is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y -CONFIG_CMDLINE="kasan.stacktrace=off stack_depot_disable=off page_owner=on no_hash_pointers panic_on_taint=0x20" +CONFIG_CMDLINE="console=ttyMSM0,115200n8 kasan.stacktrace=off stack_depot_disable=off page_owner=on no_hash_pointers panic_on_taint=0x20" CONFIG_DEBUG_KMEMLEAK=y # CONFIG_DEBUG_KMEMLEAK_AUTO_SCAN is not set CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y From 93e1118c7934f6bb1ec9b03ada3fb7f2a1763a84 Mon Sep 17 00:00:00 2001 From: Alistair Delva Date: Thu, 8 Dec 2022 13:20:51 -0800 Subject: [PATCH 953/963] ANDROID: kleaf: add DDK definitions for risc64 Bug: 260012551 Signed-off-by: Alistair Delva Change-Id: I6d6e90f927796d1355af3ad181624b7abd876a9a --- BUILD.bazel | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 82bc747fc41c..c89e71f59c41 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -379,6 +379,15 @@ ddk_headers( visibility = ["//visibility:public"], ) +ddk_headers( + name = "all_headers_riscv64", + hdrs = [":all_headers_allowlist_riscv64"] + select({ + "//build/kernel/kleaf:allow_ddk_unsafe_headers_set": [":all_headers_unsafe"], + "//conditions:default": [], + }), + visibility = ["//visibility:public"], +) + ddk_headers( name = "all_headers_x86_64", hdrs = [":all_headers_allowlist_x86_64"] + select({ @@ -429,6 +438,24 @@ ddk_headers( visibility = ["//visibility:private"], ) +ddk_headers( + name = "all_headers_allowlist_riscv64", + hdrs = [ + ":all_headers_allowlist_common_globs", + ":all_headers_allowlist_riscv64_globs", + ], + # The list of include directories where source files can #include headers + # from. In other words, these are the `-I` option to the C compiler. + # These are prepended to LINUXINCLUDE. + linux_includes = [ + "arch/riscv/include", + "arch/riscv/include/uapi", + "include", + "include/uapi", + ], + visibility = ["//visibility:private"], +) + ddk_headers( name = "all_headers_allowlist_x86_64", hdrs = [ @@ -453,6 +480,13 @@ ddk_headers( # These are separate filegroup targets so the all_headers_allowlist_* are # more friendly to batch BUILD file update tools like buildozer. +# globs() for arm64 only +filegroup( + name = "all_headers_allowlist_aarch64_globs", + srcs = glob(["arch/arm64/include/**/*.h"]), + visibility = ["//visibility:private"], +) + # globs() for arm only filegroup( name = "all_headers_allowlist_arm_globs", @@ -460,10 +494,10 @@ filegroup( visibility = ["//visibility:private"], ) -# globs() for arm64 only +# globs() for riscv64 only filegroup( - name = "all_headers_allowlist_aarch64_globs", - srcs = glob(["arch/arm64/include/**/*.h"]), + name = "all_headers_allowlist_riscv64_globs", + srcs = glob(["arch/riscv/include/**/*.h"]), visibility = ["//visibility:private"], ) From 38f1d4aefdac30600698ab07cb1e41fca5851f7a Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 1 Dec 2022 18:45:40 +0200 Subject: [PATCH 954/963] mailmap: update Matti Vaittinen's email address The email backend used by ROHM keeps labeling patches as spam. This can result in missing the patches. Switch my mail address from a company mail to a personal one. Link: https://lkml.kernel.org/r/8f4498b66fedcbded37b3b87e0c516e659f8f583.1669912977.git.mazziesaccount@gmail.com Signed-off-by: Matti Vaittinen Suggested-by: Krzysztof Kozlowski Cc: Anup Patel Cc: Arnd Bergmann Cc: Atish Patra Cc: Baolin Wang Cc: Ben Widawsky Cc: Bjorn Andersson Cc: Christian Brauner Cc: Colin Ian King Cc: Kirill Tkhai Cc: Qais Yousef Cc: Vasily Averin Signed-off-by: Andrew Morton --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index aec51726551f..315e2207439a 100644 --- a/.mailmap +++ b/.mailmap @@ -287,6 +287,7 @@ Matthew Wilcox Matthew Wilcox Matthias Fuchs Matthieu CASTET +Matti Vaittinen Matt Ranostay Matt Ranostay Matthew Ranostay Matt Ranostay From f5ad5083404bb56c9de777dccb68c6672ef6487e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 2 Dec 2022 17:27:24 +0100 Subject: [PATCH 955/963] mm: do not BUG_ON missing brk mapping, because userspace can unmap it The following program will trigger the BUG_ON that this patch removes, because the user can munmap() mm->brk: #include #include #include #include static void *brk_now(void) { return (void *)syscall(SYS_brk, 0); } static void brk_set(void *b) { assert(syscall(SYS_brk, b) != -1); } int main(int argc, char *argv[]) { void *b = brk_now(); brk_set(b + 4096); assert(munmap(b - 4096, 4096 * 2) == 0); brk_set(b); return 0; } Compile that with musl, since glibc actually uses brk(), and then execute it, and it'll hit this splat: kernel BUG at mm/mmap.c:229! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 12 PID: 1379 Comm: a.out Tainted: G S U 6.1.0-rc7+ #419 RIP: 0010:__do_sys_brk+0x2fc/0x340 Code: 00 00 4c 89 ef e8 04 d3 fe ff eb 9a be 01 00 00 00 4c 89 ff e8 35 e0 fe ff e9 6e ff ff ff 4d 89 a7 20> RSP: 0018:ffff888140bc7eb0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000007e7000 RCX: ffff8881020fe000 RDX: ffff8881020fe001 RSI: ffff8881955c9b00 RDI: ffff8881955c9b08 RBP: 0000000000000000 R08: ffff8881955c9b00 R09: 00007ffc77844000 R10: 0000000000000000 R11: 0000000000000001 R12: 00000000007e8000 R13: 00000000007e8000 R14: 00000000007e7000 R15: ffff8881020fe000 FS: 0000000000604298(0000) GS:ffff88901f700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000603fe0 CR3: 000000015ba9a005 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace: do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x400678 Code: 10 4c 8d 41 08 4c 89 44 24 10 4c 8b 01 8b 4c 24 08 83 f9 2f 77 0a 4c 8d 4c 24 20 4c 01 c9 eb 05 48 8b> RSP: 002b:00007ffc77863890 EFLAGS: 00000212 ORIG_RAX: 000000000000000c RAX: ffffffffffffffda RBX: 000000000040031b RCX: 0000000000400678 RDX: 00000000004006a1 RSI: 00000000007e6000 RDI: 00000000007e7000 RBP: 00007ffc77863900 R08: 0000000000000000 R09: 00000000007e6000 R10: 00007ffc77863930 R11: 0000000000000212 R12: 00007ffc77863978 R13: 00007ffc77863988 R14: 0000000000000000 R15: 0000000000000000 Instead, just return the old brk value if the original mapping has been removed. [akpm@linux-foundation.org: fix changelog, per Liam] Link: https://lkml.kernel.org/r/20221202162724.2009-1-Jason@zx2c4.com Fixes: 2e7ce7d354f2 ("mm/mmap: change do_brk_flags() to expand existing VMA and add do_brk_munmap()") Signed-off-by: Jason A. Donenfeld Acked-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Reviewed-by: SeongJae Park Cc: Yu Zhao Cc: Catalin Marinas Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: Matthew Wilcox Cc: Sven Schnelle Cc: Will Deacon Cc: Jann Horn Signed-off-by: Andrew Morton --- mm/mmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 74a84eb33b90..0024e615d069 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -226,8 +226,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) /* Search one past newbrk */ mas_set(&mas, newbrk); brkvma = mas_find(&mas, oldbrk); - BUG_ON(brkvma == NULL); - if (brkvma->vm_start >= oldbrk) + if (!brkvma || brkvma->vm_start >= oldbrk) goto out; /* mapping intersects with an existing non-brk vma. */ /* * mm->brk must be protected by write mmap_lock. From de16d6e4a9fb13d07c88d0e57754348662f53b23 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Dec 2022 09:50:26 +0100 Subject: [PATCH 956/963] kselftests: cgroup: update kmem test precision tolerance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1813e51eece0 ("memcg: increase MEMCG_CHARGE_BATCH to 64") has changed the batch size while this test case has been left behind. This has led to a test failure reported by test bot: not ok 2 selftests: cgroup: test_kmem # exit=1 Update the tolerance for the pcp charges to reflect the MEMCG_CHARGE_BATCH change to fix this. [akpm@linux-foundation.org: update comments, per Roman] Link: https://lkml.kernel.org/r/Y4m8Unt6FhWKC6IH@dhcp22.suse.cz Fixes: 1813e51eece0a ("memcg: increase MEMCG_CHARGE_BATCH to 64") Signed-off-by: Michal Hocko Reported-by: kernel test robot Link: https://lore.kernel.org/oe-lkp/202212010958.c1053bd3-yujie.liu@intel.com Acked-by: Shakeel Butt Acked-by: Roman Gushchin Tested-by: Yujie Liu Cc: Eric Dumazet Cc: Feng Tang Cc: Johannes Weiner Cc: "Michal Koutný" Cc: Muchun Song Cc: Soheil Hassas Yeganeh Signed-off-by: Andrew Morton --- tools/testing/selftests/cgroup/test_kmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 22b31ebb3513..258ddc565deb 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -19,12 +19,12 @@ /* - * Memory cgroup charging is performed using percpu batches 32 pages + * Memory cgroup charging is performed using percpu batches 64 pages * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So * the maximum discrepancy between charge and vmstat entries is number - * of cpus multiplied by 32 pages. + * of cpus multiplied by 64 pages. */ -#define MAX_VMSTAT_ERROR (4096 * 32 * get_nprocs()) +#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs()) static int alloc_dcache(const char *cgroup, void *arg) From 44bcabd70cf1425b4243e02251c02b01638a8287 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 4 Dec 2022 16:51:50 -0800 Subject: [PATCH 957/963] tmpfs: fix data loss from failed fallocate Fix tmpfs data loss when the fallocate system call is interrupted by a signal, or fails for some other reason. The partial folio handling in shmem_undo_range() forgot to consider this unfalloc case, and was liable to erase or truncate out data which had already been committed earlier. It turns out that none of the partial folio handling there is appropriate for the unfalloc case, which just wants to proceed to removal of whole folios: which find_get_entries() provides, even when partially covered. Original patch by Rui Wang. Link: https://lore.kernel.org/linux-mm/33b85d82.7764.1842e9ab207.Coremail.chenguoqic@163.com/ Link: https://lkml.kernel.org/r/a5dac112-cf4b-7af-a33-f386e347fd38@google.com Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios") Signed-off-by: Hugh Dickins Reported-by: Guoqi Chen Link: https://lore.kernel.org/all/20221101032248.819360-1-kernel@hev.cc/ Cc: Rui Wang Cc: Huacai Chen Cc: Matthew Wilcox Cc: Vishal Moola (Oracle) Cc: [5.17+] Signed-off-by: Andrew Morton --- mm/shmem.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index c1d8b8a1aa3b..82911fefc2d5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -948,6 +948,15 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, index++; } + /* + * When undoing a failed fallocate, we want none of the partial folio + * zeroing and splitting below, but shall want to truncate the whole + * folio when !uptodate indicates that it was added by this fallocate, + * even when [lstart, lend] covers only a part of the folio. + */ + if (unfalloc) + goto whole_folios; + same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); if (folio) { @@ -973,6 +982,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, folio_put(folio); } +whole_folios: + index = start; while (index < end) { cond_resched(); From 630dc25e43dacfb5af94cd41532e77c47ec1caff Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 5 Dec 2022 16:08:57 +0100 Subject: [PATCH 958/963] mm/swap: fix SWP_PFN_BITS with CONFIG_PHYS_ADDR_T_64BIT on 32bit We use "unsigned long" to store a PFN in the kernel and phys_addr_t to store a physical address. On a 64bit system, both are 64bit wide. However, on a 32bit system, the latter might be 64bit wide. This is, for example, the case on x86 with PAE: phys_addr_t and PTEs are 64bit wide, while "unsigned long" only spans 32bit. The current definition of SWP_PFN_BITS without MAX_PHYSMEM_BITS misses that case, and assumes that the maximum PFN is limited by an 32bit phys_addr_t. This implies, that SWP_PFN_BITS will currently only be able to cover 4 GiB - 1 on any 32bit system with 4k page size, which is wrong. Let's rely on the number of bits in phys_addr_t instead, but make sure to not exceed the maximum swap offset, to not make the BUILD_BUG_ON() in is_pfn_swap_entry() unhappy. Note that swp_entry_t is effectively an unsigned long and the maximum swap offset shares that value with the swap type. For example, on an 8 GiB x86 PAE system with a kernel config based on Debian 11.5 (-> CONFIG_FLATMEM=y, CONFIG_X86_PAE=y), we will currently fail removing migration entries (remove_migration_ptes()), because mm/page_vma_mapped.c:check_pte() will fail to identify a PFN match as swp_offset_pfn() wrongly masks off PFN bits. For example, split_huge_page_to_list()->...->remap_page() will leave migration entries in place and continue to unlock the page. Later, when we stumble over these migration entries (e.g., via /proc/self/pagemap), pfn_swap_entry_to_page() will BUG_ON() because these migration entries shouldn't exist anymore and the page was unlocked. [ 33.067591] kernel BUG at include/linux/swapops.h:497! [ 33.067597] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 33.067602] CPU: 3 PID: 742 Comm: cow Tainted: G E 6.1.0-rc8+ #16 [ 33.067605] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014 [ 33.067606] EIP: pagemap_pmd_range+0x644/0x650 [ 33.067612] Code: 00 00 00 00 66 90 89 ce b9 00 f0 ff ff e9 ff fb ff ff 89 d8 31 db e8 48 c6 52 00 e9 23 fb ff ff e8 61 83 56 00 e9 b6 fe ff ff <0f> 0b bf 00 f0 ff ff e9 38 fa ff ff 3e 8d 74 26 00 55 89 e5 57 31 [ 33.067615] EAX: ee394000 EBX: 00000002 ECX: ee394000 EDX: 00000000 [ 33.067617] ESI: c1b0ded4 EDI: 00024a00 EBP: c1b0ddb4 ESP: c1b0dd68 [ 33.067619] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 EFLAGS: 00010246 [ 33.067624] CR0: 80050033 CR2: b7a00000 CR3: 01bbbd20 CR4: 00350ef0 [ 33.067625] Call Trace: [ 33.067628] ? madvise_free_pte_range+0x720/0x720 [ 33.067632] ? smaps_pte_range+0x4b0/0x4b0 [ 33.067634] walk_pgd_range+0x325/0x720 [ 33.067637] ? mt_find+0x1d6/0x3a0 [ 33.067641] ? mt_find+0x1d6/0x3a0 [ 33.067643] __walk_page_range+0x164/0x170 [ 33.067646] walk_page_range+0xf9/0x170 [ 33.067648] ? __kmem_cache_alloc_node+0x2a8/0x340 [ 33.067653] pagemap_read+0x124/0x280 [ 33.067658] ? default_llseek+0x101/0x160 [ 33.067662] ? smaps_account+0x1d0/0x1d0 [ 33.067664] vfs_read+0x90/0x290 [ 33.067667] ? do_madvise.part.0+0x24b/0x390 [ 33.067669] ? debug_smp_processor_id+0x12/0x20 [ 33.067673] ksys_pread64+0x58/0x90 [ 33.067675] __ia32_sys_ia32_pread64+0x1b/0x20 [ 33.067680] __do_fast_syscall_32+0x4c/0xc0 [ 33.067683] do_fast_syscall_32+0x29/0x60 [ 33.067686] do_SYSENTER_32+0x15/0x20 [ 33.067689] entry_SYSENTER_32+0x98/0xf1 Decrease the indentation level of SWP_PFN_BITS and SWP_PFN_MASK to keep it readable and consistent. [david@redhat.com: rely on sizeof(phys_addr_t) and min_t() instead] Link: https://lkml.kernel.org/r/20221206105737.69478-1-david@redhat.com [david@redhat.com: use "int" for comparison, as we're only comparing numbers < 64] Link: https://lkml.kernel.org/r/1f157500-2676-7cef-a84e-9224ed64e540@redhat.com Link: https://lkml.kernel.org/r/20221205150857.167583-1-david@redhat.com Fixes: 0d206b5d2e0d ("mm/swap: add swp_offset_pfn() to fetch PFN from swap entry") Signed-off-by: David Hildenbrand Acked-by: Peter Xu Reviewed-by: Yang Shi Cc: Hugh Dickins Cc: Andrea Arcangeli Signed-off-by: Andrew Morton --- include/linux/swapops.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 86b95ccb81bb..b07b277d6a16 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -33,11 +33,13 @@ * can use the extra bits to store other information besides PFN. */ #ifdef MAX_PHYSMEM_BITS -#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) +#define SWP_PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) #else /* MAX_PHYSMEM_BITS */ -#define SWP_PFN_BITS (BITS_PER_LONG - PAGE_SHIFT) +#define SWP_PFN_BITS min_t(int, \ + sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \ + SWP_TYPE_SHIFT) #endif /* MAX_PHYSMEM_BITS */ -#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) +#define SWP_PFN_MASK (BIT(SWP_PFN_BITS) - 1) /** * Migration swap entry specific bitfield definitions. Layout: From 6c28ca6485ddd7c5da171e479e3ebfbe661efc4d Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 5 Dec 2022 19:23:17 +0000 Subject: [PATCH 959/963] mmap: fix do_brk_flags() modifying obviously incorrect VMAs Add more sanity checks to the VMA that do_brk_flags() will expand. Ensure the VMA matches basic merge requirements within the function before calling can_vma_merge_after(). Drop the duplicate checks from vm_brk_flags() since they will be enforced later. The old code would expand file VMAs on brk(), which is functionally wrong and also dangerous in terms of locking because the brk() path isn't designed for file VMAs and therefore doesn't lock the file mapping. Checking can_vma_merge_after() ensures that new anonymous VMAs can't be merged into file VMAs. See https://lore.kernel.org/linux-mm/CAG48ez1tJZTOjS_FjRZhvtDA-STFmdw8PEizPDwMGFd_ui0Nrw@mail.gmail.com/ Link: https://lkml.kernel.org/r/20221205192304.1957418-1-Liam.Howlett@oracle.com Fixes: 2e7ce7d354f2 ("mm/mmap: change do_brk_flags() to expand existing VMA and add do_brk_munmap()") Signed-off-by: Liam R. Howlett Suggested-by: Jann Horn Cc: Jason A. Donenfeld Cc: Matthew Wilcox Cc: SeongJae Park Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton --- mm/mmap.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 0024e615d069..1d9aac3aa899 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2948,9 +2948,9 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma, * Expand the existing vma if possible; Note that singular lists do not * occur after forking, so the expand will only happen on new VMAs. */ - if (vma && - (!vma->anon_vma || list_is_singular(&vma->anon_vma_chain)) && - ((vma->vm_flags & ~VM_SOFTDIRTY) == flags)) { + if (vma && vma->vm_end == addr && !vma_policy(vma) && + can_vma_merge_after(vma, flags, NULL, NULL, + addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) { mas_set_range(mas, vma->vm_start, addr + len - 1); if (mas_preallocate(mas, vma, GFP_KERNEL)) return -ENOMEM; @@ -3037,11 +3037,6 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) goto munmap_failed; vma = mas_prev(&mas, 0); - if (!vma || vma->vm_end != addr || vma_policy(vma) || - !can_vma_merge_after(vma, flags, NULL, NULL, - addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) - vma = NULL; - ret = do_brk_flags(&mas, vma, addr, len, flags); populate = ((mm->def_flags & VM_LOCKED) != 0); mmap_write_unlock(mm); From fcd0ccd836ffad73d98a66f6fea7b16f735ea920 Mon Sep 17 00:00:00 2001 From: John Starks Date: Tue, 6 Dec 2022 22:00:53 -0800 Subject: [PATCH 960/963] mm/gup: fix gup_pud_range() for dax For dax pud, pud_huge() returns true on x86. So the function works as long as hugetlb is configured. However, dax doesn't depend on hugetlb. Commit 414fd080d125 ("mm/gup: fix gup_pmd_range() for dax") fixed devmap-backed huge PMDs, but missed devmap-backed huge PUDs. Fix this as well. This fixes the below kernel panic: general protection fault, probably for non-canonical address 0x69e7c000cc478: 0000 [#1] SMP < snip > Call Trace: get_user_pages_fast+0x1f/0x40 iov_iter_get_pages+0xc6/0x3b0 ? mempool_alloc+0x5d/0x170 bio_iov_iter_get_pages+0x82/0x4e0 ? bvec_alloc+0x91/0xc0 ? bio_alloc_bioset+0x19a/0x2a0 blkdev_direct_IO+0x282/0x480 ? __io_complete_rw_common+0xc0/0xc0 ? filemap_range_has_page+0x82/0xc0 generic_file_direct_write+0x9d/0x1a0 ? inode_update_time+0x24/0x30 __generic_file_write_iter+0xbd/0x1e0 blkdev_write_iter+0xb4/0x150 ? io_import_iovec+0x8d/0x340 io_write+0xf9/0x300 io_issue_sqe+0x3c3/0x1d30 ? sysvec_reschedule_ipi+0x6c/0x80 __io_queue_sqe+0x33/0x240 ? fget+0x76/0xa0 io_submit_sqes+0xe6a/0x18d0 ? __fget_light+0xd1/0x100 __x64_sys_io_uring_enter+0x199/0x880 ? __context_tracking_enter+0x1f/0x70 ? irqentry_exit_to_user_mode+0x24/0x30 ? irqentry_exit+0x1d/0x30 ? __context_tracking_exit+0xe/0x70 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x61/0xcb RIP: 0033:0x7fc97c11a7be < snip > ---[ end trace 48b2e0e67debcaeb ]--- RIP: 0010:internal_get_user_pages_fast+0x340/0x990 < snip > Kernel panic - not syncing: Fatal exception Kernel Offset: disabled Link: https://lkml.kernel.org/r/1670392853-28252-1-git-send-email-ssengar@linux.microsoft.com Fixes: 414fd080d125 ("mm/gup: fix gup_pmd_range() for dax") Signed-off-by: John Starks Signed-off-by: Saurabh Sengar Cc: Jan Kara Cc: Yu Zhao Cc: Jason Gunthorpe Cc: John Hubbard Cc: David Hildenbrand Cc: Dan Williams Cc: Alistair Popple Cc: Signed-off-by: Andrew Morton --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index fe195d47de74..3b7bc2c1fd44 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2852,7 +2852,7 @@ static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned lo next = pud_addr_end(addr, end); if (unlikely(!pud_present(pud))) return 0; - if (unlikely(pud_huge(pud))) { + if (unlikely(pud_huge(pud) || pud_devmap(pud))) { if (!gup_huge_pud(pud, pudp, addr, next, flags, pages, nr)) return 0; From a501788ab2603d97c41e8cda59cd74b72c29951f Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Thu, 8 Dec 2022 19:55:48 +0800 Subject: [PATCH 961/963] MAINTAINERS: update Muchun Song's email I'm moving to the @linux.dev account. Map my old addresses and update it to my new address. Link: https://lkml.kernel.org/r/20221208115548.85244-1-songmuchun@bytedance.com Signed-off-by: Muchun Song Signed-off-by: Andrew Morton --- .mailmap | 2 ++ MAINTAINERS | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 315e2207439a..f5bd014469d7 100644 --- a/.mailmap +++ b/.mailmap @@ -373,6 +373,8 @@ Ricardo Ribalda Roman Gushchin Roman Gushchin Roman Gushchin +Muchun Song +Muchun Song Ross Zwisler Rudolf Marek Rui Saraiva diff --git a/MAINTAINERS b/MAINTAINERS index 0e152a7afda8..93913c3825a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5299,7 +5299,7 @@ M: Johannes Weiner M: Michal Hocko M: Roman Gushchin M: Shakeel Butt -R: Muchun Song +R: Muchun Song L: cgroups@vger.kernel.org L: linux-mm@kvack.org S: Maintained @@ -9443,7 +9443,7 @@ F: drivers/net/ethernet/huawei/hinic/ HUGETLB SUBSYSTEM M: Mike Kravetz -M: Muchun Song +M: Muchun Song L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages From 4a7ba45b1a435e7097ca0f79a847d0949d0eb088 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 7 Dec 2022 16:53:15 -1000 Subject: [PATCH 962/963] memcg: fix possible use-after-free in memcg_write_event_control() memcg_write_event_control() accesses the dentry->d_name of the specified control fd to route the write call. As a cgroup interface file can't be renamed, it's safe to access d_name as long as the specified file is a regular cgroup file. Also, as these cgroup interface files can't be removed before the directory, it's safe to access the parent too. Prior to 347c4a874710 ("memcg: remove cgroup_event->cft"), there was a call to __file_cft() which verified that the specified file is a regular cgroupfs file before further accesses. The cftype pointer returned from __file_cft() was no longer necessary and the commit inadvertently dropped the file type check with it allowing any file to slip through. With the invarients broken, the d_name and parent accesses can now race against renames and removals of arbitrary files and cause use-after-free's. Fix the bug by resurrecting the file type check in __file_cft(). Now that cgroupfs is implemented through kernfs, checking the file operations needs to go through a layer of indirection. Instead, let's check the superblock and dentry type. Link: https://lkml.kernel.org/r/Y5FRm/cfcKPGzWwl@slm.duckdns.org Fixes: 347c4a874710 ("memcg: remove cgroup_event->cft") Signed-off-by: Tejun Heo Reported-by: Jann Horn Acked-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Linus Torvalds Cc: Michal Hocko Cc: Muchun Song Cc: Shakeel Butt Cc: [3.14+] Signed-off-by: Andrew Morton --- include/linux/cgroup.h | 1 + kernel/cgroup/cgroup-internal.h | 1 - mm/memcontrol.c | 15 +++++++++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 528bd44b59e2..2b7d077de7ef 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -68,6 +68,7 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index fd4020835ec6..367b0a42ada9 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -167,7 +167,6 @@ struct cgroup_mgctx { extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; -extern struct file_system_type cgroup_fs_type; /* iterate across the hierarchies */ #define for_each_root(root) \ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a1a35c12635e..266a1ab05434 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4832,6 +4832,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, unsigned int efd, cfd; struct fd efile; struct fd cfile; + struct dentry *cdentry; const char *name; char *endp; int ret; @@ -4885,6 +4886,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, if (ret < 0) goto out_put_cfile; + /* + * The control file must be a regular cgroup1 file. As a regular cgroup + * file can't be renamed, it's safe to access its name afterwards. + */ + cdentry = cfile.file->f_path.dentry; + if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) { + ret = -EINVAL; + goto out_put_cfile; + } + /* * Determine the event callbacks and set them in @event. This used * to be done via struct cftype but cgroup core no longer knows @@ -4893,7 +4904,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * * DO NOT ADD NEW FILES. */ - name = cfile.file->f_path.dentry->d_name.name; + name = cdentry->d_name.name; if (!strcmp(name, "memory.usage_in_bytes")) { event->register_event = mem_cgroup_usage_register_event; @@ -4917,7 +4928,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent, + cfile_css = css_tryget_online_from_dir(cdentry->d_parent, &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css)) From 830b3c68c1fb1e9176028d02ef86f3cf76aa2476 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 11 Dec 2022 14:15:18 -0800 Subject: [PATCH 963/963] Linux 6.1 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0992f827888d..997b67722292 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION*