From 0b8b21c0b38fbcbe6b5fa783cc363eeec1f686e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 May 2022 09:57:35 +0200 Subject: [PATCH 001/143] clk: generalize devm_clk_get() a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit abae8e57e49aa75f6db76aa866c775721523908f ] Allow to add an exit hook to devm managed clocks. Also use clk_get_optional() in devm_clk_get_optional instead of open coding it. The generalisation will be used in the next commit to add some more devm_clk helpers. Reviewed-by: Jonathan Cameron Reviewed-by: Alexandru Ardelean Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20220520075737.758761-3-u.kleine-koenig@pengutronix.de Signed-off-by: Stephen Boyd Stable-dep-of: 340cb392a038 ("memory: atmel-sdramc: Fix missing clk_disable_unprepare in atmel_ramc_probe()") Signed-off-by: Sasha Levin --- drivers/clk/clk-devres.c | 76 ++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 22 deletions(-) diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index f9d5b7334341..c822f4ef1584 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -4,39 +4,71 @@ #include #include +struct devm_clk_state { + struct clk *clk; + void (*exit)(struct clk *clk); +}; + static void devm_clk_release(struct device *dev, void *res) { - clk_put(*(struct clk **)res); + struct devm_clk_state *state = *(struct devm_clk_state **)res; + + if (state->exit) + state->exit(state->clk); + + clk_put(state->clk); +} + +static struct clk *__devm_clk_get(struct device *dev, const char *id, + struct clk *(*get)(struct device *dev, const char *id), + int (*init)(struct clk *clk), + void (*exit)(struct clk *clk)) +{ + struct devm_clk_state *state; + struct clk *clk; + int ret; + + state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL); + if (!state) + return ERR_PTR(-ENOMEM); + + clk = get(dev, id); + if (IS_ERR(clk)) { + ret = PTR_ERR(clk); + goto err_clk_get; + } + + if (init) { + ret = init(clk); + if (ret) + goto err_clk_init; + } + + state->clk = clk; + state->exit = exit; + + devres_add(dev, state); + + return clk; + +err_clk_init: + + clk_put(clk); +err_clk_get: + + devres_free(state); + return ERR_PTR(ret); } struct clk *devm_clk_get(struct device *dev, const char *id) { - struct clk **ptr, *clk; - - ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return ERR_PTR(-ENOMEM); - - clk = clk_get(dev, id); - if (!IS_ERR(clk)) { - *ptr = clk; - devres_add(dev, ptr); - } else { - devres_free(ptr); - } - - return clk; + return __devm_clk_get(dev, id, clk_get, NULL, NULL); } EXPORT_SYMBOL(devm_clk_get); struct clk *devm_clk_get_optional(struct device *dev, const char *id) { - struct clk *clk = devm_clk_get(dev, id); - - if (clk == ERR_PTR(-ENOENT)) - return NULL; - - return clk; + return __devm_clk_get(dev, id, clk_get_optional, NULL, NULL); } EXPORT_SYMBOL(devm_clk_get_optional); From 935ec78de50e21ac516b3d24d978281511b8417a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 20 May 2022 09:57:36 +0200 Subject: [PATCH 002/143] clk: Provide new devm_clk helpers for prepared and enabled clocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7ef9651e9792b08eb310c6beb202cbc947f43cab ] When a driver keeps a clock prepared (or enabled) during the whole lifetime of the driver, these helpers allow to simplify the drivers. Reviewed-by: Jonathan Cameron Reviewed-by: Alexandru Ardelean Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20220520075737.758761-4-u.kleine-koenig@pengutronix.de Signed-off-by: Stephen Boyd Stable-dep-of: 340cb392a038 ("memory: atmel-sdramc: Fix missing clk_disable_unprepare in atmel_ramc_probe()") Signed-off-by: Sasha Levin --- drivers/clk/clk-devres.c | 27 ++++++++++ include/linux/clk.h | 109 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index c822f4ef1584..43ccd20e0298 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -66,12 +66,39 @@ struct clk *devm_clk_get(struct device *dev, const char *id) } EXPORT_SYMBOL(devm_clk_get); +struct clk *devm_clk_get_prepared(struct device *dev, const char *id) +{ + return __devm_clk_get(dev, id, clk_get, clk_prepare, clk_unprepare); +} +EXPORT_SYMBOL_GPL(devm_clk_get_prepared); + +struct clk *devm_clk_get_enabled(struct device *dev, const char *id) +{ + return __devm_clk_get(dev, id, clk_get, + clk_prepare_enable, clk_disable_unprepare); +} +EXPORT_SYMBOL_GPL(devm_clk_get_enabled); + struct clk *devm_clk_get_optional(struct device *dev, const char *id) { return __devm_clk_get(dev, id, clk_get_optional, NULL, NULL); } EXPORT_SYMBOL(devm_clk_get_optional); +struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id) +{ + return __devm_clk_get(dev, id, clk_get_optional, + clk_prepare, clk_unprepare); +} +EXPORT_SYMBOL_GPL(devm_clk_get_optional_prepared); + +struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id) +{ + return __devm_clk_get(dev, id, clk_get_optional, + clk_prepare_enable, clk_disable_unprepare); +} +EXPORT_SYMBOL_GPL(devm_clk_get_optional_enabled); + struct clk_bulk_devres { struct clk_bulk_data *clks; int num_clks; diff --git a/include/linux/clk.h b/include/linux/clk.h index 7fd6a1febcf4..1814eabb7c20 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -418,6 +418,47 @@ int __must_check devm_clk_bulk_get_all(struct device *dev, */ struct clk *devm_clk_get(struct device *dev, const char *id); +/** + * devm_clk_get_prepared - devm_clk_get() + clk_prepare() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. (IOW, @id may be identical strings, but + * clk_get may return different clock producers depending on @dev.) + * + * The returned clk (if valid) is prepared. Drivers must however assume + * that the clock is not enabled. + * + * The clock will automatically be unprepared and freed when the device + * is unbound from the bus. + */ +struct clk *devm_clk_get_prepared(struct device *dev, const char *id); + +/** + * devm_clk_get_enabled - devm_clk_get() + clk_prepare_enable() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. (IOW, @id may be identical strings, but + * clk_get may return different clock producers depending on @dev.) + * + * The returned clk (if valid) is prepared and enabled. + * + * The clock will automatically be disabled, unprepared and freed + * when the device is unbound from the bus. + */ +struct clk *devm_clk_get_enabled(struct device *dev, const char *id); + /** * devm_clk_get_optional - lookup and obtain a managed reference to an optional * clock producer. @@ -429,6 +470,50 @@ struct clk *devm_clk_get(struct device *dev, const char *id); */ struct clk *devm_clk_get_optional(struct device *dev, const char *id); +/** + * devm_clk_get_optional_prepared - devm_clk_get_optional() + clk_prepare() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. If no such clk is found, it returns NULL + * which serves as a dummy clk. That's the only difference compared + * to devm_clk_get_prepared(). + * + * The returned clk (if valid) is prepared. Drivers must however + * assume that the clock is not enabled. + * + * The clock will automatically be unprepared and freed when the + * device is unbound from the bus. + */ +struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id); + +/** + * devm_clk_get_optional_enabled - devm_clk_get_optional() + + * clk_prepare_enable() + * @dev: device for clock "consumer" + * @id: clock consumer ID + * + * Context: May sleep. + * + * Return: a struct clk corresponding to the clock producer, or + * valid IS_ERR() condition containing errno. The implementation + * uses @dev and @id to determine the clock consumer, and thereby + * the clock producer. If no such clk is found, it returns NULL + * which serves as a dummy clk. That's the only difference compared + * to devm_clk_get_enabled(). + * + * The returned clk (if valid) is prepared and enabled. + * + * The clock will automatically be disabled, unprepared and freed + * when the device is unbound from the bus. + */ +struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id); + /** * devm_get_clk_from_child - lookup and obtain a managed reference to a * clock producer from child node. @@ -773,12 +858,36 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id) return NULL; } +static inline struct clk *devm_clk_get_prepared(struct device *dev, + const char *id) +{ + return NULL; +} + +static inline struct clk *devm_clk_get_enabled(struct device *dev, + const char *id) +{ + return NULL; +} + static inline struct clk *devm_clk_get_optional(struct device *dev, const char *id) { return NULL; } +static inline struct clk *devm_clk_get_optional_prepared(struct device *dev, + const char *id) +{ + return NULL; +} + +static inline struct clk *devm_clk_get_optional_enabled(struct device *dev, + const char *id) +{ + return NULL; +} + static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks, struct clk_bulk_data *clks) { From 53f55d6e07c413be72dbeee134a7a061afa4524a Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 25 Nov 2022 15:37:57 +0800 Subject: [PATCH 003/143] memory: atmel-sdramc: Fix missing clk_disable_unprepare in atmel_ramc_probe() [ Upstream commit 340cb392a038cf70540a4cdf2e98a247c66b6df4 ] The clk_disable_unprepare() should be called in the error handling of caps->has_mpddr_clk, fix it by replacing devm_clk_get and clk_prepare_enable by devm_clk_get_enabled. Fixes: e81b6abebc87 ("memory: add a driver for atmel ram controllers") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221125073757.3535219-1-cuigaosheng1@huawei.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/memory/atmel-sdramc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/memory/atmel-sdramc.c b/drivers/memory/atmel-sdramc.c index 9c49d00c2a96..ea6e9e1eaf04 100644 --- a/drivers/memory/atmel-sdramc.c +++ b/drivers/memory/atmel-sdramc.c @@ -47,19 +47,17 @@ static int atmel_ramc_probe(struct platform_device *pdev) caps = of_device_get_match_data(&pdev->dev); if (caps->has_ddrck) { - clk = devm_clk_get(&pdev->dev, "ddrck"); + clk = devm_clk_get_enabled(&pdev->dev, "ddrck"); if (IS_ERR(clk)) return PTR_ERR(clk); - clk_prepare_enable(clk); } if (caps->has_mpddr_clk) { - clk = devm_clk_get(&pdev->dev, "mpddr"); + clk = devm_clk_get_enabled(&pdev->dev, "mpddr"); if (IS_ERR(clk)) { pr_err("AT91 RAMC: couldn't get mpddr clock\n"); return PTR_ERR(clk); } - clk_prepare_enable(clk); } return 0; From e57ea0c6ba7f9ba5944f2157861b99dec13303a4 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sat, 26 Nov 2022 12:49:11 +0800 Subject: [PATCH 004/143] memory: mvebu-devbus: Fix missing clk_disable_unprepare in mvebu_devbus_probe() [ Upstream commit cb8fd6f75775165390ededea8799b60d93d9fe3e ] The clk_disable_unprepare() should be called in the error handling of devbus_get_timing_params() and of_platform_populate(), fix it by replacing devm_clk_get and clk_prepare_enable by devm_clk_get_enabled. Fixes: e81b6abebc87 ("memory: add a driver for atmel ram controllers") Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221126044911.7226-1-cuigaosheng1@huawei.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/memory/mvebu-devbus.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/memory/mvebu-devbus.c b/drivers/memory/mvebu-devbus.c index 8450638e8670..efc6c08db2b7 100644 --- a/drivers/memory/mvebu-devbus.c +++ b/drivers/memory/mvebu-devbus.c @@ -280,10 +280,9 @@ static int mvebu_devbus_probe(struct platform_device *pdev) if (IS_ERR(devbus->base)) return PTR_ERR(devbus->base); - clk = devm_clk_get(&pdev->dev, NULL); + clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(clk)) return PTR_ERR(clk); - clk_prepare_enable(clk); /* * Obtain clock period in picoseconds, From 2a3c3a01e2f02b864c6c35936248e1bbac975710 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Nov 2022 13:31:23 -0300 Subject: [PATCH 005/143] ARM: dts: imx6ul-pico-dwarf: Use 'clock-frequency' [ Upstream commit 94e2cf1e0db5b06c7a6ae0878c5cbec925819a8a ] 'clock_frequency' is not a valid property. Use the correct 'clock-frequency' instead. Fixes: 47246fafef84 ("ARM: dts: imx6ul-pico: Add support for the dwarf baseboard") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6ul-pico-dwarf.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts index 162dc259edc8..5a74c7f68eb6 100644 --- a/arch/arm/boot/dts/imx6ul-pico-dwarf.dts +++ b/arch/arm/boot/dts/imx6ul-pico-dwarf.dts @@ -32,7 +32,7 @@ sys_mclk: clock-sys-mclk { }; &i2c2 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c2>; status = "okay"; From 6805e392f508c5831efe4477678d52a6cab7e090 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Nov 2022 13:31:24 -0300 Subject: [PATCH 006/143] ARM: dts: imx7d-pico: Use 'clock-frequency' [ Upstream commit f4dd0845c4f1f5371f1e06fef0e4a1734a2db964 ] 'clock_frequency' is not a valid property. Use the correct 'clock-frequency' instead. Fixes: 8b646cfb84c3 ("ARM: dts: imx7d-pico: Add support for the dwarf baseboard") Fixes: 6418fd92417f ("ARM: dts: imx7d-pico: Add support for the nymph baseboard") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx7d-pico-dwarf.dts | 4 ++-- arch/arm/boot/dts/imx7d-pico-nymph.dts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx7d-pico-dwarf.dts b/arch/arm/boot/dts/imx7d-pico-dwarf.dts index 5162fe227d1e..fdc10563f147 100644 --- a/arch/arm/boot/dts/imx7d-pico-dwarf.dts +++ b/arch/arm/boot/dts/imx7d-pico-dwarf.dts @@ -32,7 +32,7 @@ sys_mclk: clock-sys-mclk { }; &i2c1 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; @@ -52,7 +52,7 @@ pressure-sensor@60 { }; &i2c4 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; diff --git a/arch/arm/boot/dts/imx7d-pico-nymph.dts b/arch/arm/boot/dts/imx7d-pico-nymph.dts index 104a85254adb..5afb1674e012 100644 --- a/arch/arm/boot/dts/imx7d-pico-nymph.dts +++ b/arch/arm/boot/dts/imx7d-pico-nymph.dts @@ -43,7 +43,7 @@ sys_mclk: clock-sys-mclk { }; &i2c1 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; status = "okay"; @@ -64,7 +64,7 @@ adc@52 { }; &i2c2 { - clock_frequency = <100000>; + clock-frequency = <100000>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c2>; status = "okay"; From cccb0aea9c3f9aa88e4c41330a9d23bcd036b123 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 21 Nov 2022 17:22:59 -0300 Subject: [PATCH 007/143] ARM: dts: imx6qdl-gw560x: Remove incorrect 'uart-has-rtscts' [ Upstream commit 9dfbc72256b5de608ad10989bcbafdbbd1ac8d4e ] The following build warning is seen when running: make dtbs_check DT_SCHEMA_FILES=fsl-imx-uart.yaml arch/arm/boot/dts/imx6dl-gw560x.dtb: serial@2020000: rts-gpios: False schema does not allow [[20, 1, 0]] From schema: Documentation/devicetree/bindings/serial/fsl-imx-uart.yaml The imx6qdl-gw560x board does not expose the UART RTS and CTS as native UART pins, so 'uart-has-rtscts' should not be used. Using 'uart-has-rtscts' with 'rts-gpios' is an invalid combination detected by serial.yaml. Fix the problem by removing the incorrect 'uart-has-rtscts' property. Fixes: b8a559feffb2 ("ARM: dts: imx: add Gateworks Ventana GW5600 support") Signed-off-by: Fabio Estevam Acked-by: Tim Harvey Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx6qdl-gw560x.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi index 093a219a77ae..f520e337698a 100644 --- a/arch/arm/boot/dts/imx6qdl-gw560x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw560x.dtsi @@ -634,7 +634,6 @@ &ssi1 { &uart1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1>; - uart-has-rtscts; rts-gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>; status = "okay"; }; From 5c1dcedd5206c5233f2f7b3e82fe762c7a549180 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 2 Dec 2022 13:10:37 -0600 Subject: [PATCH 008/143] arm64: dts: imx8mm-beacon: Fix ecspi2 pinmux [ Upstream commit 5225ba9db112ec4ed67da5e4d8b72e618573955e ] Early hardware did not support hardware handshaking on the UART, but final production hardware did. When the hardware was updated the chip select was changed to facilitate hardware handshaking on UART3. Fix the ecspi2 pin mux to eliminate a pin conflict with UART3 and allow the EEPROM to operate again. Fixes: 4ce01ce36d77 ("arm64: dts: imx8mm-beacon: Enable RTS-CTS on UART3") Signed-off-by: Adam Ford Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi index 5667009aae13..674a0ab8a539 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-baseboard.dtsi @@ -70,7 +70,7 @@ sound { &ecspi2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_espi2>; - cs-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>; + cs-gpios = <&gpio5 13 GPIO_ACTIVE_LOW>; status = "okay"; eeprom@0 { @@ -187,7 +187,7 @@ pinctrl_espi2: espi2grp { MX8MM_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x82 MX8MM_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x82 MX8MM_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x82 - MX8MM_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x41 + MX8MM_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x41 >; }; From d775671dccf415d5e19581092270fbc52da6b06f Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Thu, 8 Dec 2022 17:54:03 +0100 Subject: [PATCH 009/143] ARM: imx: add missing of_node_put() [ Upstream commit 87b30c4b0efb6a194a7b8eac2568a3da520d905f ] Calling of_find_compatible_node() returns a node pointer with refcount incremented. Use of_node_put() on it when done. The patch fixes the same problem on different i.MX platforms. Fixes: 8b88f7ef31dde ("ARM: mx25: Retrieve IIM base from dt") Fixes: 94b2bec1b0e05 ("ARM: imx27: Retrieve the SYSCTRL base address from devicetree") Fixes: 3172225d45bd9 ("ARM: imx31: Retrieve the IIM base address from devicetree") Fixes: f68ea682d1da7 ("ARM: imx35: Retrieve the IIM base address from devicetree") Fixes: ee18a7154ee08 ("ARM: imx5: retrieve iim base from device tree") Signed-off-by: Dario Binacchi Reviewed-by: Fabio Estevam Reviewed-by: Martin Kaiser Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/mach-imx/cpu-imx25.c | 1 + arch/arm/mach-imx/cpu-imx27.c | 1 + arch/arm/mach-imx/cpu-imx31.c | 1 + arch/arm/mach-imx/cpu-imx35.c | 1 + arch/arm/mach-imx/cpu-imx5.c | 1 + 5 files changed, 5 insertions(+) diff --git a/arch/arm/mach-imx/cpu-imx25.c b/arch/arm/mach-imx/cpu-imx25.c index b2e1963f473d..2ee2d2813d57 100644 --- a/arch/arm/mach-imx/cpu-imx25.c +++ b/arch/arm/mach-imx/cpu-imx25.c @@ -23,6 +23,7 @@ static int mx25_read_cpu_rev(void) np = of_find_compatible_node(NULL, NULL, "fsl,imx25-iim"); iim_base = of_iomap(np, 0); + of_node_put(np); BUG_ON(!iim_base); rev = readl(iim_base + MXC_IIMSREV); iounmap(iim_base); diff --git a/arch/arm/mach-imx/cpu-imx27.c b/arch/arm/mach-imx/cpu-imx27.c index bf70e13bbe9e..1d2893908368 100644 --- a/arch/arm/mach-imx/cpu-imx27.c +++ b/arch/arm/mach-imx/cpu-imx27.c @@ -28,6 +28,7 @@ static int mx27_read_cpu_rev(void) np = of_find_compatible_node(NULL, NULL, "fsl,imx27-ccm"); ccm_base = of_iomap(np, 0); + of_node_put(np); BUG_ON(!ccm_base); /* * now we have access to the IO registers. As we need diff --git a/arch/arm/mach-imx/cpu-imx31.c b/arch/arm/mach-imx/cpu-imx31.c index b9c24b851d1a..35c544924e50 100644 --- a/arch/arm/mach-imx/cpu-imx31.c +++ b/arch/arm/mach-imx/cpu-imx31.c @@ -39,6 +39,7 @@ static int mx31_read_cpu_rev(void) np = of_find_compatible_node(NULL, NULL, "fsl,imx31-iim"); iim_base = of_iomap(np, 0); + of_node_put(np); BUG_ON(!iim_base); /* read SREV register from IIM module */ diff --git a/arch/arm/mach-imx/cpu-imx35.c b/arch/arm/mach-imx/cpu-imx35.c index 80e7d8ab9f1b..1fe75b39c2d9 100644 --- a/arch/arm/mach-imx/cpu-imx35.c +++ b/arch/arm/mach-imx/cpu-imx35.c @@ -21,6 +21,7 @@ static int mx35_read_cpu_rev(void) np = of_find_compatible_node(NULL, NULL, "fsl,imx35-iim"); iim_base = of_iomap(np, 0); + of_node_put(np); BUG_ON(!iim_base); rev = imx_readl(iim_base + MXC_IIMSREV); diff --git a/arch/arm/mach-imx/cpu-imx5.c b/arch/arm/mach-imx/cpu-imx5.c index ad56263778f9..a67c89bf155d 100644 --- a/arch/arm/mach-imx/cpu-imx5.c +++ b/arch/arm/mach-imx/cpu-imx5.c @@ -28,6 +28,7 @@ static u32 imx5_read_srev_reg(const char *compat) np = of_find_compatible_node(NULL, NULL, compat); iim_base = of_iomap(np, 0); + of_node_put(np); WARN_ON(!iim_base); srev = readl(iim_base + IIM_SREV) & 0xff; From 7b4516ba56f1fcb13ffc91912f3074e28362228d Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 22 Nov 2022 21:48:23 +0800 Subject: [PATCH 010/143] HID: intel_ish-hid: Add check for ishtp_dma_tx_map [ Upstream commit b3d40c3ec3dc4ad78017de6c3a38979f57aaaab8 ] As the kcalloc may return NULL pointer, it should be better to check the ishtp_dma_tx_map before use in order to avoid NULL pointer dereference. Fixes: 3703f53b99e4 ("HID: intel_ish-hid: ISH Transport layer") Signed-off-by: Jiasheng Jiang Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/intel-ish-hid/ishtp/dma-if.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hid/intel-ish-hid/ishtp/dma-if.c b/drivers/hid/intel-ish-hid/ishtp/dma-if.c index 40554c8daca0..00046cbfd4ed 100644 --- a/drivers/hid/intel-ish-hid/ishtp/dma-if.c +++ b/drivers/hid/intel-ish-hid/ishtp/dma-if.c @@ -104,6 +104,11 @@ void *ishtp_cl_get_dma_send_buf(struct ishtp_device *dev, int required_slots = (size / DMA_SLOT_SIZE) + 1 * (size % DMA_SLOT_SIZE != 0); + if (!dev->ishtp_dma_tx_map) { + dev_err(dev->devc, "Fail to allocate Tx map\n"); + return NULL; + } + spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags); for (i = 0; i <= (dev->ishtp_dma_num_slots - required_slots); i++) { free = 1; @@ -150,6 +155,11 @@ void ishtp_cl_release_dma_acked_mem(struct ishtp_device *dev, return; } + if (!dev->ishtp_dma_tx_map) { + dev_err(dev->devc, "Fail to allocate Tx map\n"); + return; + } + i = (msg_addr - dev->ishtp_host_dma_tx_buf) / DMA_SLOT_SIZE; spin_lock_irqsave(&dev->ishtp_dma_tx_lock, flags); for (j = 0; j < acked_slots; j++) { From 329fbd260352a7b9a83781d8b8bd96f95844a51f Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 09:48:24 +0400 Subject: [PATCH 011/143] EDAC/highbank: Fix memory leak in highbank_mc_probe() [ Upstream commit e7a293658c20a7945014570e1921bf7d25d68a36 ] When devres_open_group() fails, it returns -ENOMEM without freeing memory allocated by edac_mc_alloc(). Call edac_mc_free() on the error handling path to avoid a memory leak. [ bp: Massage commit message. ] Fixes: a1b01edb2745 ("edac: add support for Calxeda highbank memory controller") Signed-off-by: Miaoqian Lin Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Andre Przywara Link: https://lore.kernel.org/r/20221229054825.1361993-1-linmq006@gmail.com Signed-off-by: Sasha Levin --- drivers/edac/highbank_mc_edac.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/edac/highbank_mc_edac.c b/drivers/edac/highbank_mc_edac.c index 61b76ec226af..19fba258ae10 100644 --- a/drivers/edac/highbank_mc_edac.c +++ b/drivers/edac/highbank_mc_edac.c @@ -174,8 +174,10 @@ static int highbank_mc_probe(struct platform_device *pdev) drvdata = mci->pvt_info; platform_set_drvdata(pdev, mci); - if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) - return -ENOMEM; + if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) { + res = -ENOMEM; + goto free; + } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) { @@ -243,6 +245,7 @@ static int highbank_mc_probe(struct platform_device *pdev) edac_mc_del_mc(&pdev->dev); err: devres_release_group(&pdev->dev, NULL); +free: edac_mc_free(mci); return res; } From e85df1db28dc93d3177f5770a8b2f2bb3b3eb863 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 22 Dec 2022 18:38:20 +0000 Subject: [PATCH 012/143] firmware: arm_scmi: Harden shared memory access in fetch_response [ Upstream commit ad78b81a1077f7d956952cd8bdfe1e61504e3eb8 ] A misbheaving SCMI platform firmware could reply with out-of-spec messages, shorter than the mimimum size comprising a header and a status field. Harden shmem_fetch_response to properly truncate such a bad messages. Fixes: 5c8a47a5a91d ("firmware: arm_scmi: Make scmi core independent of the transport type") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221222183823.518856-3-cristian.marussi@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/shmem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 0e3eaea5d852..415ef7df8fc3 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -58,10 +58,11 @@ u32 shmem_read_header(struct scmi_shared_mem __iomem *shmem) void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem, struct scmi_xfer *xfer) { + size_t len = ioread32(&shmem->length); + xfer->hdr.status = ioread32(shmem->msg_payload); /* Skip the length of header and status in shmem area i.e 8 bytes */ - xfer->rx.len = min_t(size_t, xfer->rx.len, - ioread32(&shmem->length) - 8); + xfer->rx.len = min_t(size_t, xfer->rx.len, len > 8 ? len - 8 : 0); /* Take a copy to the rx buffer.. */ memcpy_fromio(xfer->rx.buf, shmem->msg_payload + 4, xfer->rx.len); From 6813d8ba7dac2de37670e04cbe25dd7071d612fd Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 22 Dec 2022 18:38:21 +0000 Subject: [PATCH 013/143] firmware: arm_scmi: Harden shared memory access in fetch_notification [ Upstream commit 9bae076cd4e3e3c3dc185cae829d80b2dddec86e ] A misbheaving SCMI platform firmware could reply with out-of-spec notifications, shorter than the mimimum size comprising a header. Fixes: d5141f37c42e ("firmware: arm_scmi: Add notifications support in transport layer") Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221222183823.518856-4-cristian.marussi@arm.com Signed-off-by: Sudeep Holla Signed-off-by: Sasha Levin --- drivers/firmware/arm_scmi/shmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/shmem.c b/drivers/firmware/arm_scmi/shmem.c index 415ef7df8fc3..56a1f61aa3ff 100644 --- a/drivers/firmware/arm_scmi/shmem.c +++ b/drivers/firmware/arm_scmi/shmem.c @@ -71,8 +71,10 @@ void shmem_fetch_response(struct scmi_shared_mem __iomem *shmem, void shmem_fetch_notification(struct scmi_shared_mem __iomem *shmem, size_t max_len, struct scmi_xfer *xfer) { + size_t len = ioread32(&shmem->length); + /* Skip only the length of header in shmem area i.e 4 bytes */ - xfer->rx.len = min_t(size_t, max_len, ioread32(&shmem->length) - 4); + xfer->rx.len = min_t(size_t, max_len, len > 4 ? len - 4 : 0); /* Take a copy to the rx buffer.. */ memcpy_fromio(xfer->rx.buf, shmem->msg_payload, xfer->rx.len); From eab7a920379af8d44a5a3c73edb29544063e7a5e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 7 Jan 2023 16:47:41 +0900 Subject: [PATCH 014/143] tomoyo: fix broken dependency on *.conf.default [ Upstream commit eaf2213ba563b2d74a1f2c13a6b258273f689802 ] If *.conf.default is updated, builtin-policy.h should be rebuilt, but this does not work when compiled with O= option. [Without this commit] $ touch security/tomoyo/policy/exception_policy.conf.default $ make O=/tmp security/tomoyo/ make[1]: Entering directory '/tmp' GEN Makefile CALL /home/masahiro/ref/linux/scripts/checksyscalls.sh DESCEND objtool make[1]: Leaving directory '/tmp' [With this commit] $ touch security/tomoyo/policy/exception_policy.conf.default $ make O=/tmp security/tomoyo/ make[1]: Entering directory '/tmp' GEN Makefile CALL /home/masahiro/ref/linux/scripts/checksyscalls.sh DESCEND objtool POLICY security/tomoyo/builtin-policy.h CC security/tomoyo/common.o AR security/tomoyo/built-in.a make[1]: Leaving directory '/tmp' $(srctree)/ is essential because $(wildcard ) does not follow VPATH. Fixes: f02dee2d148b ("tomoyo: Do not generate empty policy files") Signed-off-by: Masahiro Yamada Signed-off-by: Tetsuo Handa Signed-off-by: Sasha Levin --- security/tomoyo/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index cca5a3012fee..221eaadffb09 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -10,7 +10,7 @@ endef quiet_cmd_policy = POLICY $@ cmd_policy = ($(call do_policy,profile); $(call do_policy,exception_policy); $(call do_policy,domain_policy); $(call do_policy,manager); $(call do_policy,stat)) >$@ -$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(src)/policy/*.conf.default) FORCE +$(obj)/builtin-policy.h: $(wildcard $(obj)/policy/*.conf $(srctree)/$(src)/policy/*.conf.default) FORCE $(call if_changed,policy) $(obj)/common.o: $(obj)/builtin-policy.h From d66c1d4178c219b6e7d7a6f714e3e3656faccc36 Mon Sep 17 00:00:00 2001 From: Yonatan Nachum Date: Mon, 9 Jan 2023 13:37:11 +0000 Subject: [PATCH 015/143] RDMA/core: Fix ib block iterator counter overflow [ Upstream commit 0afec5e9cea732cb47014655685a2a47fb180c31 ] When registering a new DMA MR after selecting the best aligned page size for it, we iterate over the given sglist to split each entry to smaller, aligned to the selected page size, DMA blocks. In given circumstances where the sg entry and page size fit certain sizes and the sg entry is not aligned to the selected page size, the total size of the aligned pages we need to cover the sg entry is >= 4GB. Under this circumstances, while iterating page aligned blocks, the counter responsible for counting how much we advanced from the start of the sg entry is overflowed because its type is u32 and we pass 4GB in size. This can lead to an infinite loop inside the iterator function because the overflow prevents the counter to be larger than the size of the sg entry. Fix the presented problem by changing the advancement condition to eliminate overflow. Backtrace: [ 192.374329] efa_reg_user_mr_dmabuf [ 192.376783] efa_register_mr [ 192.382579] pgsz_bitmap 0xfffff000 rounddown 0x80000000 [ 192.386423] pg_sz [0x80000000] umem_length[0xc0000000] [ 192.392657] start 0x0 length 0xc0000000 params.page_shift 31 params.page_num 3 [ 192.399559] hp_cnt[3], pages_in_hp[524288] [ 192.403690] umem->sgt_append.sgt.nents[1] [ 192.407905] number entries: [1], pg_bit: [31] [ 192.411397] biter->__sg_nents [1] biter->__sg [0000000008b0c5d8] [ 192.415601] biter->__sg_advance [665837568] sg_dma_len[3221225472] [ 192.419823] biter->__sg_nents [1] biter->__sg [0000000008b0c5d8] [ 192.423976] biter->__sg_advance [2813321216] sg_dma_len[3221225472] [ 192.428243] biter->__sg_nents [1] biter->__sg [0000000008b0c5d8] [ 192.432397] biter->__sg_advance [665837568] sg_dma_len[3221225472] Fixes: a808273a495c ("RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks") Signed-off-by: Yonatan Nachum Link: https://lore.kernel.org/r/20230109133711.13678-1-ynachum@amazon.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/core/verbs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 5889639e90a1..5123be0ab02f 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2911,15 +2911,18 @@ EXPORT_SYMBOL(__rdma_block_iter_start); bool __rdma_block_iter_next(struct ib_block_iter *biter) { unsigned int block_offset; + unsigned int sg_delta; if (!biter->__sg_nents || !biter->__sg) return false; biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); - biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset; + sg_delta = BIT_ULL(biter->__pg_bit) - block_offset; - if (biter->__sg_advance >= sg_dma_len(biter->__sg)) { + if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) { + biter->__sg_advance += sg_delta; + } else { biter->__sg_advance = 0; biter->__sg = sg_next(biter->__sg); biter->__sg_nents--; From 73e5666bf30d74cbc33c5b55f1ab0346a07d99a8 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:11 -0500 Subject: [PATCH 016/143] IB/hfi1: Reject a zero-length user expected buffer [ Upstream commit 0a0a6e80472c98947d73c3d13bcd7d101895f55d ] A zero length user buffer makes no sense and the code does not handle it correctly. Instead, reject a zero length as invalid. Fixes: 97736f36dbeb ("IB/hfi1: Validate page aligned for a given virtual addres") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328547120.1472310.6362802432127399257.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b94fc7fd75a9..dd8ce2a62d2b 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -298,6 +298,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, if (!PAGE_ALIGNED(tinfo->vaddr)) return -EINVAL; + if (tinfo->length == 0) + return -EINVAL; tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); if (!tidbuf) From ee474dd66e82ac7711ecf700706f4fb4d7b46afd Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:16 -0500 Subject: [PATCH 017/143] IB/hfi1: Reserve user expected TIDs [ Upstream commit ecf91551cdd2925ed6d9a9d99074fa5f67b90596 ] To avoid a race, reserve the number of user expected TIDs before setup. Fixes: 7e7a436ecb6e ("staging/hfi1: Add TID entry program function body") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328547636.1472310.7419712824785353905.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index dd8ce2a62d2b..1278cd55a480 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -324,16 +324,13 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, /* Find sets of physically contiguous pages */ tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); - /* - * We don't need to access this under a lock since tid_used is per - * process and the same process cannot be in hfi1_user_exp_rcv_clear() - * and hfi1_user_exp_rcv_setup() at the same time. - */ + /* Reserve the number of expected tids to be used. */ spin_lock(&fd->tid_lock); if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) pageset_count = fd->tid_limit - fd->tid_used; else pageset_count = tidbuf->n_psets; + fd->tid_used += pageset_count; spin_unlock(&fd->tid_lock); if (!pageset_count) @@ -442,10 +439,11 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); + /* adjust reserved tid_used to actual count */ + spin_lock(&fd->tid_lock); + fd->tid_used -= pageset_count - tididx; + spin_unlock(&fd->tid_lock); if (tididx) { - spin_lock(&fd->tid_lock); - fd->tid_used += tididx; - spin_unlock(&fd->tid_lock); tinfo->tidcnt = tididx; tinfo->length = mapped_pages * PAGE_SIZE; From 6dd8136fd1b365aeae47690fe4bddbc3e5d82657 Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:21 -0500 Subject: [PATCH 018/143] IB/hfi1: Fix expected receive setup error exit issues [ Upstream commit e0c4a422f5246abefbf7c178ef99a1f2dc3c5f62 ] Fix three error exit issues in expected receive setup. Re-arrange error exits to increase readability. Issues and fixes: 1. Possible missed page unpin if tidlist copyout fails and not all pinned pages where made part of a TID. Fix: Unpin the unused pages. 2. Return success with unset return values tidcnt and length when no pages were pinned. Fix: Return -ENOSPC if no pages were pinned. 3. Return success with unset return values tidcnt and length when no rcvarray entries available. Fix: Return -ENOSPC if no rcvarray entries are available. Fixes: 7e7a436ecb6e ("staging/hfi1: Add TID entry program function body") Fixes: 97736f36dbeb ("IB/hfi1: Validate page aligned for a given virtual addres") Fixes: f404ca4c7ea8 ("IB/hfi1: Refactor hfi_user_exp_rcv_setup() IOCTL") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328548150.1472310.1492305874804187634.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 87 ++++++++++++++--------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 1278cd55a480..ba61d327a85d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -310,15 +310,14 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), GFP_KERNEL); if (!tidbuf->psets) { - kfree(tidbuf); - return -ENOMEM; + ret = -ENOMEM; + goto fail_release_mem; } pinned = pin_rcv_pages(fd, tidbuf); if (pinned <= 0) { - kfree(tidbuf->psets); - kfree(tidbuf); - return pinned; + ret = (pinned < 0) ? pinned : -ENOSPC; + goto fail_unpin; } /* Find sets of physically contiguous pages */ @@ -333,14 +332,16 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, fd->tid_used += pageset_count; spin_unlock(&fd->tid_lock); - if (!pageset_count) - goto bail; + if (!pageset_count) { + ret = -ENOSPC; + goto fail_unreserve; + } ngroups = pageset_count / dd->rcv_entries.group_size; tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); if (!tidlist) { ret = -ENOMEM; - goto nomem; + goto fail_unreserve; } tididx = 0; @@ -436,44 +437,60 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, } unlock: mutex_unlock(&uctxt->exp_mutex); -nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); + + /* fail if nothing was programmed, set error if none provided */ + if (tididx == 0) { + if (ret >= 0) + ret = -ENOSPC; + goto fail_unreserve; + } + /* adjust reserved tid_used to actual count */ spin_lock(&fd->tid_lock); fd->tid_used -= pageset_count - tididx; spin_unlock(&fd->tid_lock); - if (tididx) { - tinfo->tidcnt = tididx; - tinfo->length = mapped_pages * PAGE_SIZE; - if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), - tidlist, sizeof(tidlist[0]) * tididx)) { - /* - * On failure to copy to the user level, we need to undo - * everything done so far so we don't leak resources. - */ - tinfo->tidlist = (unsigned long)&tidlist; - hfi1_user_exp_rcv_clear(fd, tinfo); - tinfo->tidlist = 0; - ret = -EFAULT; - goto bail; - } + /* unpin all pages not covered by a TID */ + unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, + false); + + tinfo->tidcnt = tididx; + tinfo->length = mapped_pages * PAGE_SIZE; + + if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), + tidlist, sizeof(tidlist[0]) * tididx)) { + ret = -EFAULT; + goto fail_unprogram; } - /* - * If not everything was mapped (due to insufficient RcvArray entries, - * for example), unpin all unmapped pages so we can pin them nex time. - */ - if (mapped_pages != pinned) - unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, - (pinned - mapped_pages), false); -bail: - kfree(tidbuf->psets); - kfree(tidlist); kfree(tidbuf->pages); + kfree(tidbuf->psets); kfree(tidbuf); - return ret > 0 ? 0 : ret; + kfree(tidlist); + return 0; + +fail_unprogram: + /* unprogram, unmap, and unpin all allocated TIDs */ + tinfo->tidlist = (unsigned long)tidlist; + hfi1_user_exp_rcv_clear(fd, tinfo); + tinfo->tidlist = 0; + pinned = 0; /* nothing left to unpin */ + pageset_count = 0; /* nothing left reserved */ +fail_unreserve: + spin_lock(&fd->tid_lock); + fd->tid_used -= pageset_count; + spin_unlock(&fd->tid_lock); +fail_unpin: + if (pinned > 0) + unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); +fail_release_mem: + kfree(tidbuf->pages); + kfree(tidbuf->psets); + kfree(tidbuf); + kfree(tidlist); + return ret; } int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, From 6ce4382bd1377dadba4c2b3ce23ca976e333b95f Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:26 -0500 Subject: [PATCH 019/143] IB/hfi1: Immediately remove invalid memory from hardware [ Upstream commit 1c7edde1b5720ddb0aff5ca8c7f605a0f92526eb ] When a user expected receive page is unmapped, it should be immediately removed from hardware rather than depend on a reaction from user space. Fixes: 2677a7680e77 ("IB/hfi1: Fix memory leak during unexpected shutdown") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328548663.1472310.7871808081861622659.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 43 +++++++++++++++-------- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 1 + 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index ba61d327a85d..b70d1b135ee4 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -70,8 +70,9 @@ static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, unsigned int start, u16 count, u32 *tidlist, unsigned int *tididx, unsigned int *pmapped); -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, - struct tid_group **grp); +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); +static void __clear_tid_node(struct hfi1_filedata *fd, + struct tid_rb_node *node); static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static const struct mmu_interval_notifier_ops tid_mn_ops = { @@ -511,7 +512,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, mutex_lock(&uctxt->exp_mutex); for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { - ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); + ret = unprogram_rcvarray(fd, tidinfo[tididx]); if (ret) { hfi1_cdbg(TID, "Failed to unprogram rcv array %d", ret); @@ -767,6 +768,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, } node->fdata = fd; + mutex_init(&node->invalidate_mutex); node->phys = page_to_phys(pages[0]); node->npages = npages; node->rcventry = rcventry; @@ -806,8 +808,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, return -EFAULT; } -static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, - struct tid_group **grp) +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; @@ -830,9 +831,6 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, if (!node || node->rcventry != (uctxt->expected_base + rcventry)) return -EBADF; - if (grp) - *grp = node->grp; - if (fd->use_mn) mmu_interval_notifier_remove(&node->notifier); cacheless_tid_rb_remove(fd, node); @@ -840,23 +838,34 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, return 0; } -static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) +static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) { struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; + mutex_lock(&node->invalidate_mutex); + if (node->freed) + goto done; + node->freed = true; + trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, node->npages, node->notifier.interval_tree.start, node->phys, node->dma_addr); - /* - * Make sure device has seen the write before we unpin the - * pages. - */ + /* Make sure device has seen the write before pages are unpinned */ hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); +done: + mutex_unlock(&node->invalidate_mutex); +} + +static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + + __clear_tid_node(fd, node); node->grp->used--; node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); @@ -915,10 +924,16 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, if (node->freed) return true; + /* take action only if unmapping */ + if (range->event != MMU_NOTIFY_UNMAP) + return true; + trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->notifier.interval_tree.start, node->rcventry, node->npages, node->dma_addr); - node->freed = true; + + /* clear the hardware rcvarray entry */ + __clear_tid_node(fdata, node); spin_lock(&fdata->invalid_lock); if (fdata->invalid_tid_idx < uctxt->expected_count) { diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index d45c7b6988d4..ba06ab3b4769 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -68,6 +68,7 @@ struct tid_user_buf { struct tid_rb_node { struct mmu_interval_notifier notifier; struct hfi1_filedata *fdata; + struct mutex invalidate_mutex; /* covers hw removal */ unsigned long phys; struct tid_group *grp; u32 rcventry; From f6fa12fbb17cdd46d6f981634dd502840df2d3ea Mon Sep 17 00:00:00 2001 From: Dean Luick Date: Mon, 9 Jan 2023 12:31:31 -0500 Subject: [PATCH 020/143] IB/hfi1: Remove user expected buffer invalidate race [ Upstream commit b3deec25847bda34e34d5d7be02f633caf000bd8 ] During setup, there is a possible race between a page invalidate and hardware programming. Add a covering invalidate over the user target range during setup. If anything within that range is invalidated during setup, fail the setup. Once set up, each TID will have its own invalidate callback and invalidate. Fixes: 3889551db212 ("RDMA/hfi1: Use mmu_interval_notifier_insert for user_exp_rcv") Signed-off-by: Dean Luick Signed-off-by: Dennis Dalessandro Link: https://lore.kernel.org/r/167328549178.1472310.9867497376936699488.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_exp_rcv.c | 58 +++++++++++++++++++++-- drivers/infiniband/hw/hfi1/user_exp_rcv.h | 2 + 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b70d1b135ee4..897923981855 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -65,6 +65,9 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, unsigned long cur_seq); +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, struct tid_group *grp, unsigned int start, u16 count, @@ -78,6 +81,9 @@ static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); static const struct mmu_interval_notifier_ops tid_mn_ops = { .invalidate = tid_rb_invalidate, }; +static const struct mmu_interval_notifier_ops tid_cover_ops = { + .invalidate = tid_cover_invalidate, +}; /* * Initialize context and file private data needed for Expected @@ -296,6 +302,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, tididx = 0, mapped, mapped_pages = 0; u32 *tidlist = NULL; struct tid_user_buf *tidbuf; + unsigned long mmu_seq = 0; if (!PAGE_ALIGNED(tinfo->vaddr)) return -EINVAL; @@ -306,6 +313,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, if (!tidbuf) return -ENOMEM; + mutex_init(&tidbuf->cover_mutex); tidbuf->vaddr = tinfo->vaddr; tidbuf->length = tinfo->length; tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), @@ -315,6 +323,16 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, goto fail_release_mem; } + if (fd->use_mn) { + ret = mmu_interval_notifier_insert( + &tidbuf->notifier, current->mm, + tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, + &tid_cover_ops); + if (ret) + goto fail_release_mem; + mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); + } + pinned = pin_rcv_pages(fd, tidbuf); if (pinned <= 0) { ret = (pinned < 0) ? pinned : -ENOSPC; @@ -457,6 +475,20 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, false); + if (fd->use_mn) { + /* check for an invalidate during setup */ + bool fail = false; + + mutex_lock(&tidbuf->cover_mutex); + fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); + mutex_unlock(&tidbuf->cover_mutex); + + if (fail) { + ret = -EBUSY; + goto fail_unprogram; + } + } + tinfo->tidcnt = tididx; tinfo->length = mapped_pages * PAGE_SIZE; @@ -466,6 +498,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, goto fail_unprogram; } + if (fd->use_mn) + mmu_interval_notifier_remove(&tidbuf->notifier); kfree(tidbuf->pages); kfree(tidbuf->psets); kfree(tidbuf); @@ -484,6 +518,8 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, fd->tid_used -= pageset_count; spin_unlock(&fd->tid_lock); fail_unpin: + if (fd->use_mn) + mmu_interval_notifier_remove(&tidbuf->notifier); if (pinned > 0) unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); fail_release_mem: @@ -784,11 +820,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd, &tid_mn_ops); if (ret) goto out_unmap; - /* - * FIXME: This is in the wrong order, the notifier should be - * established before the pages are pinned by pin_rcv_pages. - */ - mmu_interval_read_begin(&node->notifier); } fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; @@ -963,6 +994,23 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni, return true; } +static bool tid_cover_invalidate(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + struct tid_user_buf *tidbuf = + container_of(mni, struct tid_user_buf, notifier); + + /* take action only if unmapping */ + if (range->event == MMU_NOTIFY_UNMAP) { + mutex_lock(&tidbuf->cover_mutex); + mmu_interval_set_seq(mni, cur_seq); + mutex_unlock(&tidbuf->cover_mutex); + } + + return true; +} + static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, struct tid_rb_node *tnode) { diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h index ba06ab3b4769..849f265f2f11 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h @@ -57,6 +57,8 @@ struct tid_pageset { }; struct tid_user_buf { + struct mmu_interval_notifier notifier; + struct mutex cover_mutex; unsigned long vaddr; unsigned long length; unsigned int npages; From 39ab0fc4984285851048716e7f85ef8b11cabf11 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 10 Jan 2023 13:49:30 +0100 Subject: [PATCH 021/143] affs: initialize fsdata in affs_truncate() [ Upstream commit eef034ac6690118c88f357b00e2b3239c9d8575d ] When aops->write_begin() does not initialize fsdata, KMSAN may report an error passing the latter to aops->write_end(). Fix this by unconditionally initializing fsdata. Fixes: f2b6a16eb8f5 ("fs: affs convert to new aops") Suggested-by: Eric Biggers Signed-off-by: Alexander Potapenko Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/affs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index d91b0133d95d..c3d89fa1bab7 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -879,7 +879,7 @@ affs_truncate(struct inode *inode) if (inode->i_size > AFFS_I(inode)->mmu_private) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; loff_t isize = inode->i_size; int res; From b18490138dc69ed83c5cfcdad8789004e67fee8d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 17 Dec 2022 17:05:41 +0100 Subject: [PATCH 022/143] PM: AVS: qcom-cpr: Fix an error handling path in cpr_probe() [ Upstream commit 6049aae52392539e505bfb8ccbcff3c26f1d2f0b ] If an error occurs after a successful pm_genpd_init() call, it should be undone by a corresponding pm_genpd_remove(). Add the missing call in the error handling path, as already done in the remove function. Fixes: bf6910abf548 ("power: avs: Add support for CPR (Core Power Reduction)") Signed-off-by: Christophe JAILLET Reviewed-by: Ulf Hansson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/0f520597dbad89ab99c217c8986912fa53eaf5f9.1671293108.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/soc/qcom/cpr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/cpr.c b/drivers/soc/qcom/cpr.c index 6298561bc29c..fac0414c3731 100644 --- a/drivers/soc/qcom/cpr.c +++ b/drivers/soc/qcom/cpr.c @@ -1743,12 +1743,16 @@ static int cpr_probe(struct platform_device *pdev) ret = of_genpd_add_provider_simple(dev->of_node, &drv->pd); if (ret) - return ret; + goto err_remove_genpd; platform_set_drvdata(pdev, drv); cpr_debugfs_init(drv); return 0; + +err_remove_genpd: + pm_genpd_remove(&drv->pd); + return ret; } static int cpr_remove(struct platform_device *pdev) From fa566549a152a99b6aca9be80b84d506eb2a91c6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Jan 2023 22:25:29 -0800 Subject: [PATCH 023/143] phy: ti: fix Kconfig warning and operator precedence [ Upstream commit 7124c93887cc4e6c5b48920f83115e4a5892e870 ] Fix Kconfig depends operator precedence to prevent a Kconfig warning: WARNING: unmet direct dependencies detected for MUX_MMIO Depends on [n]: MULTIPLEXER [=m] && OF [=n] Selected by [m]: - PHY_AM654_SERDES [=m] && (OF [=n] && ARCH_K3 || COMPILE_TEST [=y]) && COMMON_CLK [=y] Fixes: 71e2f5c5c224 ("phy: ti: Add a new SERDES driver for TI's AM654x SoC") Fixes: 091876cc355d ("phy: ti: j721e-wiz: Add support for WIZ module present in TI J721E SoC") Signed-off-by: Randy Dunlap Cc: Vinod Koul Cc: Kishon Vijay Abraham I Cc: linux-phy@lists.infradead.org Cc: Arnd Bergmann Link: https://lore.kernel.org/r/20230110062529.22668-1-rdunlap@infradead.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/ti/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/Kconfig b/drivers/phy/ti/Kconfig index 15a3bcf32308..b905902d5750 100644 --- a/drivers/phy/ti/Kconfig +++ b/drivers/phy/ti/Kconfig @@ -23,7 +23,7 @@ config PHY_DM816X_USB config PHY_AM654_SERDES tristate "TI AM654 SERDES support" - depends on OF && ARCH_K3 || COMPILE_TEST + depends on OF && (ARCH_K3 || COMPILE_TEST) depends on COMMON_CLK select GENERIC_PHY select MULTIPLEXER @@ -35,7 +35,7 @@ config PHY_AM654_SERDES config PHY_J721E_WIZ tristate "TI J721E WIZ (SERDES Wrapper) support" - depends on OF && ARCH_K3 || COMPILE_TEST + depends on OF && (ARCH_K3 || COMPILE_TEST) depends on HAS_IOMEM && OF_ADDRESS depends on COMMON_CLK select GENERIC_PHY From aa8b584cec01e2a7245f27d53d36ad287df71436 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 8 Dec 2022 13:52:41 +0200 Subject: [PATCH 024/143] ARM: dts: at91: sam9x60: fix the ddr clock for sam9x60 [ Upstream commit 9bfa2544dbd1133f0b0af4e967de3bb9c1e3a497 ] The 2nd DDR clock for sam9x60 DDR controller is peripheral clock with id 49. Fixes: 1e5f532c2737 ("ARM: dts: at91: sam9x60: add device tree for soc and board") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20221208115241.36312-1-claudiu.beznea@microchip.com Signed-off-by: Sasha Levin --- arch/arm/boot/dts/sam9x60.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi index ec45ced3cde6..e1e0dec8cc1f 100644 --- a/arch/arm/boot/dts/sam9x60.dtsi +++ b/arch/arm/boot/dts/sam9x60.dtsi @@ -567,7 +567,7 @@ pmecc: ecc-engine@ffffe000 { mpddrc: mpddrc@ffffe800 { compatible = "microchip,sam9x60-ddramc", "atmel,sama5d3-ddramc"; reg = <0xffffe800 0x200>; - clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_CORE PMC_MCK>; + clocks = <&pmc PMC_TYPE_SYSTEM 2>, <&pmc PMC_TYPE_PERIPHERAL 49>; clock-names = "ddrck", "mpddr"; }; From a65a8727a2fe606036933222313c775964de6e92 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 11 Jan 2023 22:58:51 +0530 Subject: [PATCH 025/143] amd-xgbe: TX Flow Ctrl Registers are h/w ver dependent [ Upstream commit 579923d84b04abb6cd4cd1fd9974096a2dd1832b ] There is difference in the TX Flow Control registers (TFCR) between the revisions of the hardware. The older revisions of hardware used to have single register per queue. Whereas, the newer revision of hardware (from ver 30H onwards) have one register per priority. Update the driver to use the TFCR based on the reported version of the hardware. Fixes: c5aa9e3b8156 ("amd-xgbe: Initial AMD 10GbE platform driver") Co-developed-by: Ajith Nayak Signed-off-by: Ajith Nayak Signed-off-by: Raju Rangoju Acked-by: Shyam Sundar S K Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index d5fd49dd25f3..decc1c09a031 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -524,19 +524,28 @@ static void xgbe_disable_vxlan(struct xgbe_prv_data *pdata) netif_dbg(pdata, drv, pdata->netdev, "VXLAN acceleration disabled\n"); } +static unsigned int xgbe_get_fc_queue_count(struct xgbe_prv_data *pdata) +{ + unsigned int max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; + + /* From MAC ver 30H the TFCR is per priority, instead of per queue */ + if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) >= 0x30) + return max_q_count; + else + return min_t(unsigned int, pdata->tx_q_count, max_q_count); +} + static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata) { - unsigned int max_q_count, q_count; unsigned int reg, reg_val; - unsigned int i; + unsigned int i, q_count; /* Clear MTL flow control */ for (i = 0; i < pdata->rx_q_count; i++) XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, 0); /* Clear MAC flow control */ - max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; - q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); + q_count = xgbe_get_fc_queue_count(pdata); reg = MAC_Q0TFCR; for (i = 0; i < q_count; i++) { reg_val = XGMAC_IOREAD(pdata, reg); @@ -553,9 +562,8 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) { struct ieee_pfc *pfc = pdata->pfc; struct ieee_ets *ets = pdata->ets; - unsigned int max_q_count, q_count; unsigned int reg, reg_val; - unsigned int i; + unsigned int i, q_count; /* Set MTL flow control */ for (i = 0; i < pdata->rx_q_count; i++) { @@ -579,8 +587,7 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata) } /* Set MAC flow control */ - max_q_count = XGMAC_MAX_FLOW_CONTROL_QUEUES; - q_count = min_t(unsigned int, pdata->tx_q_count, max_q_count); + q_count = xgbe_get_fc_queue_count(pdata); reg = MAC_Q0TFCR; for (i = 0; i < q_count; i++) { reg_val = XGMAC_IOREAD(pdata, reg); From f351af45e203787212cb027f6e1e482ea8d95d92 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 11 Jan 2023 22:58:52 +0530 Subject: [PATCH 026/143] amd-xgbe: Delay AN timeout during KR training [ Upstream commit 926446ae24c03311a480fb96eb78f0ce7ea6d091 ] AN restart triggered during KR training not only aborts the KR training process but also move the HW to unstable state. Driver has to wait upto 500ms or until the KR training is completed before restarting AN cycle. Fixes: 7c12aa08779c ("amd-xgbe: Move the PHY support into amd-xgbe") Co-developed-by: Sudheesh Mavila Signed-off-by: Sudheesh Mavila Signed-off-by: Raju Rangoju Acked-by: Shyam Sundar S K Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 24 +++++++++++++++++++++++ drivers/net/ethernet/amd/xgbe/xgbe.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 0c5c1b155683..43fdd111235a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -496,6 +496,7 @@ static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata, reg |= XGBE_KR_TRAINING_ENABLE; reg |= XGBE_KR_TRAINING_START; XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg); + pdata->kr_start_time = jiffies; netif_dbg(pdata, link, pdata->netdev, "KR training initiated\n"); @@ -632,6 +633,8 @@ static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata) xgbe_switch_mode(pdata); + pdata->an_result = XGBE_AN_READY; + xgbe_an_restart(pdata); return XGBE_AN_INCOMPAT_LINK; @@ -1275,9 +1278,30 @@ static bool xgbe_phy_aneg_done(struct xgbe_prv_data *pdata) static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata) { unsigned long link_timeout; + unsigned long kr_time; + int wait; link_timeout = pdata->link_check + (XGBE_LINK_TIMEOUT * HZ); if (time_after(jiffies, link_timeout)) { + if ((xgbe_cur_mode(pdata) == XGBE_MODE_KR) && + pdata->phy.autoneg == AUTONEG_ENABLE) { + /* AN restart should not happen while KR training is in progress. + * The while loop ensures no AN restart during KR training, + * waits up to 500ms and AN restart is triggered only if KR + * training is failed. + */ + wait = XGBE_KR_TRAINING_WAIT_ITER; + while (wait--) { + kr_time = pdata->kr_start_time + + msecs_to_jiffies(XGBE_AN_MS_TIMEOUT); + if (time_after(jiffies, kr_time)) + break; + /* AN restart is not required, if AN result is COMPLETE */ + if (pdata->an_result == XGBE_AN_COMPLETE) + return; + usleep_range(10000, 11000); + } + } netif_dbg(pdata, link, pdata->netdev, "AN link timeout\n"); xgbe_phy_config_aneg(pdata); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 3305979a9f7c..e0b8f3c4cc0b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -289,6 +289,7 @@ /* Auto-negotiation */ #define XGBE_AN_MS_TIMEOUT 500 #define XGBE_LINK_TIMEOUT 5 +#define XGBE_KR_TRAINING_WAIT_ITER 50 #define XGBE_SGMII_AN_LINK_STATUS BIT(1) #define XGBE_SGMII_AN_LINK_SPEED (BIT(2) | BIT(3)) @@ -1253,6 +1254,7 @@ struct xgbe_prv_data { unsigned int parallel_detect; unsigned int fec_ability; unsigned long an_start; + unsigned long kr_start_time; enum xgbe_an_mode an_mode; /* I2C support */ From da75dec7c6617bddad418159ffebcb133f008262 Mon Sep 17 00:00:00 2001 From: Luis Gerhorst Date: Mon, 9 Jan 2023 16:05:46 +0100 Subject: [PATCH 027/143] bpf: Fix pointer-leak due to insufficient speculative store bypass mitigation [ Upstream commit e4f4db47794c9f474b184ee1418f42e6a07412b6 ] To mitigate Spectre v4, 2039f26f3aca ("bpf: Fix leakage due to insufficient speculative store bypass mitigation") inserts lfence instructions after 1) initializing a stack slot and 2) spilling a pointer to the stack. However, this does not cover cases where a stack slot is first initialized with a pointer (subject to sanitization) but then overwritten with a scalar (not subject to sanitization because the slot was already initialized). In this case, the second write may be subject to speculative store bypass (SSB) creating a speculative pointer-as-scalar type confusion. This allows the program to subsequently leak the numerical pointer value using, for example, a branch-based cache side channel. To fix this, also sanitize scalars if they write a stack slot that previously contained a pointer. Assuming that pointer-spills are only generated by LLVM on register-pressure, the performance impact on most real-world BPF programs should be small. The following unprivileged BPF bytecode drafts a minimal exploit and the mitigation: [...] // r6 = 0 or 1 (skalar, unknown user input) // r7 = accessible ptr for side channel // r10 = frame pointer (fp), to be leaked // r9 = r10 # fp alias to encourage ssb *(u64 *)(r9 - 8) = r10 // fp[-8] = ptr, to be leaked // lfence added here because of pointer spill to stack. // // Ommitted: Dummy bpf_ringbuf_output() here to train alias predictor // for no r9-r10 dependency. // *(u64 *)(r10 - 8) = r6 // fp[-8] = scalar, overwrites ptr // 2039f26f3aca: no lfence added because stack slot was not STACK_INVALID, // store may be subject to SSB // // fix: also add an lfence when the slot contained a ptr // r8 = *(u64 *)(r9 - 8) // r8 = architecturally a scalar, speculatively a ptr // // leak ptr using branch-based cache side channel: r8 &= 1 // choose bit to leak if r8 == 0 goto SLOW // no mispredict // architecturally dead code if input r6 is 0, // only executes speculatively iff ptr bit is 1 r8 = *(u64 *)(r7 + 0) # encode bit in cache (0: slow, 1: fast) SLOW: [...] After running this, the program can time the access to *(r7 + 0) to determine whether the chosen pointer bit was 0 or 1. Repeat this 64 times to recover the whole address on amd64. In summary, sanitization can only be skipped if one scalar is overwritten with another scalar. Scalar-confusion due to speculative store bypass can not lead to invalid accesses because the pointer bounds deducted during verification are enforced using branchless logic. See 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") for details. Do not make the mitigation depend on !env->allow_{uninit_stack,ptr_leaks} because speculative leaks are likely unexpected if these were enabled. For example, leaking the address to a protected log file may be acceptable while disabling the mitigation might unintentionally leak the address into the cached-state of a map that is accessible to unprivileged processes. Fixes: 2039f26f3aca ("bpf: Fix leakage due to insufficient speculative store bypass mitigation") Signed-off-by: Luis Gerhorst Signed-off-by: Daniel Borkmann Acked-by: Henriette Hofmeier Link: https://lore.kernel.org/bpf/edc95bad-aada-9cfc-ffe2-fa9bb206583c@cs.fau.de Link: https://lore.kernel.org/bpf/20230109150544.41465-1-gerhorst@cs.fau.de Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 232c93357b90..a6c931fed39b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2319,7 +2319,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, bool sanitize = reg && is_spillable_regtype(reg->type); for (i = 0; i < size; i++) { - if (state->stack[spi].slot_type[i] == STACK_INVALID) { + u8 type = state->stack[spi].slot_type[i]; + + if (type != STACK_MISC && type != STACK_ZERO) { sanitize = true; break; } From 2a0156a4aaea97a270a388f7f561c668aeeabc96 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Mon, 5 Dec 2022 19:58:23 +0800 Subject: [PATCH 028/143] phy: rockchip-inno-usb2: Fix missing clk_disable_unprepare() in rockchip_usb2phy_power_on() [ Upstream commit 5daba914da0e48950e9407ea4d75fa57029c9adc ] The clk_disable_unprepare() should be called in the error handling of rockchip_usb2phy_power_on(). Fixes: 0e08d2a727e6 ("phy: rockchip-inno-usb2: add a new driver for Rockchip usb2phy") Signed-off-by: Shang XiaoJing Link: https://lore.kernel.org/r/20221205115823.16957-1-shangxiaojing@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/rockchip/phy-rockchip-inno-usb2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c index 46ebdb1460a3..cab6a94bf161 100644 --- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c @@ -467,8 +467,10 @@ static int rockchip_usb2phy_power_on(struct phy *phy) return ret; ret = property_enable(base, &rport->port_cfg->phy_sus, false); - if (ret) + if (ret) { + clk_disable_unprepare(rphy->clk480m); return ret; + } /* waiting for the utmi_clk to become stable */ usleep_range(1500, 2000); From ad1baab3a5c03692d22ce446f38596a126377f6a Mon Sep 17 00:00:00 2001 From: Jisoo Jang Date: Wed, 11 Jan 2023 22:19:14 +0900 Subject: [PATCH 029/143] net: nfc: Fix use-after-free in local_cleanup() [ Upstream commit 4bb4db7f3187c6e3de6b229ffc87cdb30a2d22b6 ] Fix a use-after-free that occurs in kfree_skb() called from local_cleanup(). This could happen when killing nfc daemon (e.g. neard) after detaching an nfc device. When detaching an nfc device, local_cleanup() called from nfc_llcp_unregister_device() frees local->rx_pending and decreases local->ref by kref_put() in nfc_llcp_local_put(). In the terminating process, nfc daemon releases all sockets and it leads to decreasing local->ref. After the last release of local->ref, local_cleanup() called from local_release() frees local->rx_pending again, which leads to the bug. Setting local->rx_pending to NULL in local_cleanup() could prevent use-after-free when local_cleanup() is called twice. Found by a modified version of syzkaller. BUG: KASAN: use-after-free in kfree_skb() Call Trace: dump_stack_lvl (lib/dump_stack.c:106) print_address_description.constprop.0.cold (mm/kasan/report.c:306) kasan_check_range (mm/kasan/generic.c:189) kfree_skb (net/core/skbuff.c:955) local_cleanup (net/nfc/llcp_core.c:159) nfc_llcp_local_put.part.0 (net/nfc/llcp_core.c:172) nfc_llcp_local_put (net/nfc/llcp_core.c:181) llcp_sock_destruct (net/nfc/llcp_sock.c:959) __sk_destruct (net/core/sock.c:2133) sk_destruct (net/core/sock.c:2181) __sk_free (net/core/sock.c:2192) sk_free (net/core/sock.c:2203) llcp_sock_release (net/nfc/llcp_sock.c:646) __sock_release (net/socket.c:650) sock_close (net/socket.c:1365) __fput (fs/file_table.c:306) task_work_run (kernel/task_work.c:179) ptrace_notify (kernel/signal.c:2354) syscall_exit_to_user_mode_prepare (kernel/entry/common.c:278) syscall_exit_to_user_mode (kernel/entry/common.c:296) do_syscall_64 (arch/x86/entry/common.c:86) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:106) Allocated by task 4719: kasan_save_stack (mm/kasan/common.c:45) __kasan_slab_alloc (mm/kasan/common.c:325) slab_post_alloc_hook (mm/slab.h:766) kmem_cache_alloc_node (mm/slub.c:3497) __alloc_skb (net/core/skbuff.c:552) pn533_recv_response (drivers/nfc/pn533/usb.c:65) __usb_hcd_giveback_urb (drivers/usb/core/hcd.c:1671) usb_giveback_urb_bh (drivers/usb/core/hcd.c:1704) tasklet_action_common.isra.0 (kernel/softirq.c:797) __do_softirq (kernel/softirq.c:571) Freed by task 1901: kasan_save_stack (mm/kasan/common.c:45) kasan_set_track (mm/kasan/common.c:52) kasan_save_free_info (mm/kasan/genericdd.c:518) __kasan_slab_free (mm/kasan/common.c:236) kmem_cache_free (mm/slub.c:3809) kfree_skbmem (net/core/skbuff.c:874) kfree_skb (net/core/skbuff.c:931) local_cleanup (net/nfc/llcp_core.c:159) nfc_llcp_unregister_device (net/nfc/llcp_core.c:1617) nfc_unregister_device (net/nfc/core.c:1179) pn53x_unregister_nfc (drivers/nfc/pn533/pn533.c:2846) pn533_usb_disconnect (drivers/nfc/pn533/usb.c:579) usb_unbind_interface (drivers/usb/core/driver.c:458) device_release_driver_internal (drivers/base/dd.c:1279) bus_remove_device (drivers/base/bus.c:529) device_del (drivers/base/core.c:3665) usb_disable_device (drivers/usb/core/message.c:1420) usb_disconnect (drivers/usb/core.c:2261) hub_event (drivers/usb/core/hub.c:5833) process_one_work (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:212 include/trace/events/workqueue.h:108 kernel/workqueue.c:2281) worker_thread (include/linux/list.h:282 kernel/workqueue.c:2423) kthread (kernel/kthread.c:319) ret_from_fork (arch/x86/entry/entry_64.S:301) Fixes: 3536da06db0b ("NFC: llcp: Clean local timers and works when removing a device") Signed-off-by: Jisoo Jang Link: https://lore.kernel.org/r/20230111131914.3338838-1-jisoo.jang@yonsei.ac.kr Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/nfc/llcp_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index cc997518f79d..edadebb3efd2 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -159,6 +159,7 @@ static void local_cleanup(struct nfc_llcp_local *local) cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); + local->rx_pending = NULL; del_timer_sync(&local->sdreq_timer); cancel_work_sync(&local->sdreq_timeout_work); nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs); From 613020d0489e3f6058d4f5c7c82bc4396cc880ed Mon Sep 17 00:00:00 2001 From: Esina Ekaterina Date: Thu, 12 Jan 2023 10:47:03 +0300 Subject: [PATCH 030/143] net: wan: Add checks for NULL for utdm in undo_uhdlc_init and unmap_si_regs [ Upstream commit 488e0bf7f34af3d42d1d5e56f7a5a7beaff188a3 ] If uhdlc_priv_tsa != 1 then utdm is not initialized. And if ret != NULL then goto undo_uhdlc_init, where utdm is dereferenced. Same if dev == NULL. Found by Astra Linux on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 8d68100ab4ad ("soc/fsl/qe: fix err handling of ucc_of_parse_tdm") Signed-off-by: Esina Ekaterina Link: https://lore.kernel.org/r/20230112074703.13558-1-eesina@astralinux.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wan/fsl_ucc_hdlc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 7eac6a3e1cde..ae1ae65e7f90 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -1245,9 +1245,11 @@ static int ucc_hdlc_probe(struct platform_device *pdev) free_dev: free_netdev(dev); undo_uhdlc_init: - iounmap(utdm->siram); + if (utdm) + iounmap(utdm->siram); unmap_si_regs: - iounmap(utdm->si_regs); + if (utdm) + iounmap(utdm->si_regs); free_utdm: if (uhdlc_priv->tsa) kfree(utdm); From 1af8071bd08ec17bd9467f6a9c3f17053c05c98c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 16 Jan 2023 10:49:57 +0100 Subject: [PATCH 031/143] gpio: mxc: Always set GPIOs used as interrupt source to INPUT mode [ Upstream commit 8e88a0feebb241cab0253698b2f7358b6ebec802 ] Always configure GPIO pins which are used as interrupt source as INPUTs. In case the default pin configuration is OUTPUT, or the prior stage does configure the pins as OUTPUT, then Linux will not reconfigure the pin as INPUT and no interrupts are received. Always configure the interrupt source GPIO pin as input to fix the above case. Reviewed-by: Linus Walleij Fixes: 07bd1a6cc7cbb ("MXC arch: Add gpio support for the whole platform") Signed-off-by: Marek Vasut Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-mxc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index ba6ed2a413f5..0d5a9fee3c70 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -231,7 +231,7 @@ static int gpio_set_irq_type(struct irq_data *d, u32 type) writel(1 << gpio_idx, port->base + GPIO_ISR); - return 0; + return port->gc.direction_input(&port->gc, gpio_idx); } static void mxc_flip_edge(struct mxc_gpio_port *port, u32 gpio) From 802fd7623e9ed19ee809b503e93fccc1e3f37bd6 Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Wed, 11 Jan 2023 18:50:31 +0100 Subject: [PATCH 032/143] wifi: rndis_wlan: Prevent buffer overflow in rndis_query_oid [ Upstream commit b870e73a56c4cccbec33224233eaf295839f228c ] Since resplen and respoffs are signed integers sufficiently large values of unsigned int len and offset members of RNDIS response will result in negative values of prior variables. This may be utilized to bypass implemented security checks to either extract memory contents by manipulating offset or overflow the data buffer via memcpy by manipulating both offset and len. Additionally assure that sum of resplen and respoffs does not overflow so buffer boundaries are kept. Fixes: 80f8c5b434f9 ("rndis_wlan: copy only useful data from rndis_command respond") Signed-off-by: Szymon Heidrich Reviewed-by: Alexander Duyck Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230111175031.7049-1-szymon.heidrich@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/rndis_wlan.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 75b5d545b49e..dc076d844868 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -694,8 +694,8 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) struct rndis_query *get; struct rndis_query_c *get_c; } u; - int ret, buflen; - int resplen, respoffs, copylen; + int ret; + size_t buflen, resplen, respoffs, copylen; buflen = *len + sizeof(*u.get); if (buflen < CONTROL_BUFFER_SIZE) @@ -730,22 +730,15 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) if (respoffs > buflen) { /* Device returned data offset outside buffer, error. */ - netdev_dbg(dev->net, "%s(%s): received invalid " - "data offset: %d > %d\n", __func__, - oid_to_string(oid), respoffs, buflen); + netdev_dbg(dev->net, + "%s(%s): received invalid data offset: %zu > %zu\n", + __func__, oid_to_string(oid), respoffs, buflen); ret = -EINVAL; goto exit_unlock; } - if ((resplen + respoffs) > buflen) { - /* Device would have returned more data if buffer would - * have been big enough. Copy just the bits that we got. - */ - copylen = buflen - respoffs; - } else { - copylen = resplen; - } + copylen = min(resplen, buflen - respoffs); if (copylen > *len) copylen = *len; From c60fe70078d6e515f424cb868d07e00411b27fbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Jan 2023 16:48:49 +0000 Subject: [PATCH 033/143] net/sched: sch_taprio: fix possible use-after-free [ Upstream commit 3a415d59c1dbec9d772dbfab2d2520d98360caae ] syzbot reported a nasty crash [1] in net_tx_action() which made little sense until we got a repro. This repro installs a taprio qdisc, but providing an invalid TCA_RATE attribute. qdisc_create() has to destroy the just initialized taprio qdisc, and taprio_destroy() is called. However, the hrtimer used by taprio had already fired, therefore advance_sched() called __netif_schedule(). Then net_tx_action was trying to use a destroyed qdisc. We can not undo the __netif_schedule(), so we must wait until one cpu serviced the qdisc before we can proceed. Many thanks to Alexander Potapenko for his help. [1] BUG: KMSAN: uninit-value in queued_spin_trylock include/asm-generic/qspinlock.h:94 [inline] BUG: KMSAN: uninit-value in do_raw_spin_trylock include/linux/spinlock.h:191 [inline] BUG: KMSAN: uninit-value in __raw_spin_trylock include/linux/spinlock_api_smp.h:89 [inline] BUG: KMSAN: uninit-value in _raw_spin_trylock+0x92/0xa0 kernel/locking/spinlock.c:138 queued_spin_trylock include/asm-generic/qspinlock.h:94 [inline] do_raw_spin_trylock include/linux/spinlock.h:191 [inline] __raw_spin_trylock include/linux/spinlock_api_smp.h:89 [inline] _raw_spin_trylock+0x92/0xa0 kernel/locking/spinlock.c:138 spin_trylock include/linux/spinlock.h:359 [inline] qdisc_run_begin include/net/sch_generic.h:187 [inline] qdisc_run+0xee/0x540 include/net/pkt_sched.h:125 net_tx_action+0x77c/0x9a0 net/core/dev.c:5086 __do_softirq+0x1cc/0x7fb kernel/softirq.c:571 run_ksoftirqd+0x2c/0x50 kernel/softirq.c:934 smpboot_thread_fn+0x554/0x9f0 kernel/smpboot.c:164 kthread+0x31b/0x430 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 Uninit was created at: slab_post_alloc_hook mm/slab.h:732 [inline] slab_alloc_node mm/slub.c:3258 [inline] __kmalloc_node_track_caller+0x814/0x1250 mm/slub.c:4970 kmalloc_reserve net/core/skbuff.c:358 [inline] __alloc_skb+0x346/0xcf0 net/core/skbuff.c:430 alloc_skb include/linux/skbuff.h:1257 [inline] nlmsg_new include/net/netlink.h:953 [inline] netlink_ack+0x5f3/0x12b0 net/netlink/af_netlink.c:2436 netlink_rcv_skb+0x55d/0x6c0 net/netlink/af_netlink.c:2507 rtnetlink_rcv+0x30/0x40 net/core/rtnetlink.c:6108 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0xf3b/0x1270 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x1288/0x1440 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0xabc/0xe90 net/socket.c:2482 ___sys_sendmsg+0x2a1/0x3f0 net/socket.c:2536 __sys_sendmsg net/socket.c:2565 [inline] __do_sys_sendmsg net/socket.c:2574 [inline] __se_sys_sendmsg net/socket.c:2572 [inline] __x64_sys_sendmsg+0x367/0x540 net/socket.c:2572 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd CPU: 0 PID: 13 Comm: ksoftirqd/0 Not tainted 6.0.0-rc2-syzkaller-47461-gac3859c02d7f #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Alexander Potapenko Cc: Vinicius Costa Gomes Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/sch_generic.h | 7 +++++++ net/sched/sch_taprio.c | 3 +++ 2 files changed, 10 insertions(+) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e7e8c318925d..61cd19ee51f4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1325,4 +1325,11 @@ static inline int skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res) return res->ingress ? netif_receive_skb(skb) : dev_queue_xmit(skb); } +/* Make sure qdisc is no longer in SCHED state. */ +static inline void qdisc_synchronize(const struct Qdisc *q) +{ + while (test_bit(__QDISC_STATE_SCHED, &q->state)) + msleep(1); +} + #endif diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 7f33b31c7b8b..5411bb4cdfc8 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1621,6 +1621,8 @@ static void taprio_reset(struct Qdisc *sch) int i; hrtimer_cancel(&q->advance_timer); + qdisc_synchronize(sch); + if (q->qdiscs) { for (i = 0; i < dev->num_tx_queues; i++) if (q->qdiscs[i]) @@ -1642,6 +1644,7 @@ static void taprio_destroy(struct Qdisc *sch) * happens in qdisc_create(), after taprio_init() has been called. */ hrtimer_cancel(&q->advance_timer); + qdisc_synchronize(sch); taprio_disable_offload(dev, q, NULL); From e34a965f771f1977f172593c73e373036c765724 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 14 Nov 2022 20:16:19 +0100 Subject: [PATCH 034/143] l2tp: Serialize access to sk_user_data with sk_callback_lock [ Upstream commit b68777d54fac21fc833ec26ea1a2a84f975ab035 ] sk->sk_user_data has multiple users, which are not compatible with each other. Writers must synchronize by grabbing the sk->sk_callback_lock. l2tp currently fails to grab the lock when modifying the underlying tunnel socket fields. Fix it by adding appropriate locking. We err on the side of safety and grab the sk_callback_lock also inside the sk_destruct callback overridden by l2tp, even though there should be no refs allowing access to the sock at the time when sk_destruct gets called. v4: - serialize write to sk_user_data in l2tp sk_destruct v3: - switch from sock lock to sk_callback_lock - document write-protection for sk_user_data v2: - update Fixes to point to origin of the bug - use real names in Reported/Tested-by tags Cc: Tom Parkin Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reported-by: Haowei Yan Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller Stable-dep-of: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()") Signed-off-by: Sasha Levin --- include/net/sock.h | 2 +- net/l2tp/l2tp_core.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 90a8b8b26a20..69bbbe8bbf34 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -315,7 +315,7 @@ struct bpf_local_storage; * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals - * @sk_user_data: RPC layer private data + * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index dc8987ed08ad..e89852bc5309 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1150,8 +1150,10 @@ static void l2tp_tunnel_destruct(struct sock *sk) } /* Remove hooks into tunnel socket */ + write_lock_bh(&sk->sk_callback_lock); sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); /* Call the original destructor */ if (sk->sk_destruct) @@ -1471,16 +1473,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock = sockfd_lookup(tunnel->fd, &ret); if (!sock) goto err; - - ret = l2tp_validate_socket(sock->sk, net, tunnel->encap); - if (ret < 0) - goto err_sock; } + sk = sock->sk; + write_lock(&sk->sk_callback_lock); + + ret = l2tp_validate_socket(sk, net, tunnel->encap); + if (ret < 0) + goto err_sock; + tunnel->l2tp_net = net; pn = l2tp_pernet(net); - sk = sock->sk; sock_hold(sk); tunnel->sock = sk; @@ -1506,7 +1510,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, setup_udp_tunnel_sock(net, sock, &udp_cfg); } else { - sk->sk_user_data = tunnel; + rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1520,6 +1524,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); + write_unlock(&sk->sk_callback_lock); return 0; err_sock: @@ -1527,6 +1532,8 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock_release(sock); else sockfd_put(sock); + + write_unlock(&sk->sk_callback_lock); err: return ret; } From 5b209b8c99d487a1c32983981bf3552980fda591 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 21 Nov 2022 09:54:26 +0100 Subject: [PATCH 035/143] l2tp: Don't sleep and disable BH under writer-side sk_callback_lock [ Upstream commit af295e854a4e3813ffbdef26dbb6a4d6226c3ea1 ] When holding a reader-writer spin lock we cannot sleep. Calling setup_udp_tunnel_sock() with write lock held violates this rule, because we end up calling percpu_down_read(), which might sleep, as syzbot reports [1]: __might_resched.cold+0x222/0x26b kernel/sched/core.c:9890 percpu_down_read include/linux/percpu-rwsem.h:49 [inline] cpus_read_lock+0x1b/0x140 kernel/cpu.c:310 static_key_slow_inc+0x12/0x20 kernel/jump_label.c:158 udp_tunnel_encap_enable include/net/udp_tunnel.h:187 [inline] setup_udp_tunnel_sock+0x43d/0x550 net/ipv4/udp_tunnel_core.c:81 l2tp_tunnel_register+0xc51/0x1210 net/l2tp/l2tp_core.c:1509 pppol2tp_connect+0xcdc/0x1a10 net/l2tp/l2tp_ppp.c:723 Trim the writer-side critical section for sk_callback_lock down to the minimum, so that it covers only operations on sk_user_data. Also, when grabbing the sk_callback_lock, we always need to disable BH, as Eric points out. Failing to do so leads to deadlocks because we acquire sk_callback_lock in softirq context, which can get stuck waiting on us if: 1) it runs on the same CPU, or CPU0 ---- lock(clock-AF_INET6); lock(clock-AF_INET6); 2) lock ordering leads to priority inversion CPU0 CPU1 ---- ---- lock(clock-AF_INET6); local_irq_disable(); lock(&tcp_hashinfo.bhash[i].lock); lock(clock-AF_INET6); lock(&tcp_hashinfo.bhash[i].lock); ... as syzbot reports [2,3]. Use the _bh variants for write_(un)lock. [1] https://lore.kernel.org/netdev/0000000000004e78ec05eda79749@google.com/ [2] https://lore.kernel.org/netdev/000000000000e38b6605eda76f98@google.com/ [3] https://lore.kernel.org/netdev/000000000000dfa31e05eda76f75@google.com/ v2: - Check and set sk_user_data while holding sk_callback_lock for both L2TP encapsulation types (IP and UDP) (Tetsuo) Cc: Tom Parkin Cc: Tetsuo Handa Fixes: b68777d54fac ("l2tp: Serialize access to sk_user_data with sk_callback_lock") Reported-by: Eric Dumazet Reported-by: syzbot+703d9e154b3b58277261@syzkaller.appspotmail.com Reported-by: syzbot+50680ced9e98a61f7698@syzkaller.appspotmail.com Reported-by: syzbot+de987172bb74a381879b@syzkaller.appspotmail.com Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller Stable-dep-of: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()") Signed-off-by: Sasha Levin --- net/l2tp/l2tp_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e89852bc5309..d6bb1795329a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1476,11 +1476,12 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, } sk = sock->sk; - write_lock(&sk->sk_callback_lock); - + write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); if (ret < 0) - goto err_sock; + goto err_inval_sock; + rcu_assign_sk_user_data(sk, tunnel); + write_unlock_bh(&sk->sk_callback_lock); tunnel->l2tp_net = net; pn = l2tp_pernet(net); @@ -1509,8 +1510,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, }; setup_udp_tunnel_sock(net, sock, &udp_cfg); - } else { - rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1524,16 +1523,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); - write_unlock(&sk->sk_callback_lock); return 0; err_sock: + write_lock_bh(&sk->sk_callback_lock); + rcu_assign_sk_user_data(sk, NULL); +err_inval_sock: + write_unlock_bh(&sk->sk_callback_lock); + if (tunnel->fd < 0) sock_release(sock); else sockfd_put(sock); - - write_unlock(&sk->sk_callback_lock); err: return ret; } From 76c640d6a1e8a98afbc6ba3f884afe6e0bc5271d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Jan 2023 19:01:36 -0800 Subject: [PATCH 036/143] l2tp: convert l2tp_tunnel_list to idr [ Upstream commit c4d48a58f32c5972174a1d01c33b296fe378cce0 ] l2tp uses l2tp_tunnel_list to track all registered tunnels and to allocate tunnel ID's. IDR can do the same job. More importantly, with IDR we can hold the ID before a successful registration so that we don't need to worry about late error handling, it is not easy to rollback socket changes. This is a preparation for the following fix. Cc: Tetsuo Handa Cc: Guillaume Nault Cc: Jakub Sitnicki Cc: Eric Dumazet Cc: Tom Parkin Signed-off-by: Cong Wang Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller Stable-dep-of: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()") Signed-off-by: Sasha Levin --- net/l2tp/l2tp_core.c | 85 ++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 43 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index d6bb1795329a..1bd52b8bb29f 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ static unsigned int l2tp_net_id; struct l2tp_net { - struct list_head l2tp_tunnel_list; - /* Lock for write access to l2tp_tunnel_list */ - spinlock_t l2tp_tunnel_list_lock; + /* Lock for write access to l2tp_tunnel_idr */ + spinlock_t l2tp_tunnel_idr_lock; + struct idr l2tp_tunnel_idr; struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2]; /* Lock for write access to l2tp_session_hlist */ spinlock_t l2tp_session_hlist_lock; @@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) struct l2tp_tunnel *tunnel; rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id && - refcount_inc_not_zero(&tunnel->ref_count)) { - rcu_read_unlock_bh(); - - return tunnel; - } + tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id); + if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) { + rcu_read_unlock_bh(); + return tunnel; } rcu_read_unlock_bh(); @@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get); struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) { - const struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_net *pn = l2tp_pernet(net); + unsigned long tunnel_id, tmp; struct l2tp_tunnel *tunnel; int count = 0; rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth && + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel && ++count > nth && refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; @@ -1229,6 +1227,15 @@ static void l2tp_udp_encap_destroy(struct sock *sk) l2tp_tunnel_delete(tunnel); } +static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel) +{ + struct l2tp_net *pn = l2tp_pernet(net); + + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); +} + /* Workqueue tunnel deletion function */ static void l2tp_tunnel_del_work(struct work_struct *work) { @@ -1236,7 +1243,6 @@ static void l2tp_tunnel_del_work(struct work_struct *work) del_work); struct sock *sk = tunnel->sock; struct socket *sock = sk->sk_socket; - struct l2tp_net *pn; l2tp_tunnel_closeall(tunnel); @@ -1250,12 +1256,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } } - /* Remove the tunnel struct from the tunnel list */ - pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_del_rcu(&tunnel->list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - + l2tp_tunnel_remove(tunnel->l2tp_net, tunnel); /* drop initial ref */ l2tp_tunnel_dec_refcount(tunnel); @@ -1457,12 +1458,19 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net, int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, struct l2tp_tunnel_cfg *cfg) { - struct l2tp_tunnel *tunnel_walk; - struct l2tp_net *pn; + struct l2tp_net *pn = l2tp_pernet(net); + u32 tunnel_id = tunnel->tunnel_id; struct socket *sock; struct sock *sk; int ret; + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id, + GFP_ATOMIC); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); + if (ret) + return ret == -ENOSPC ? -EEXIST : ret; + if (tunnel->fd < 0) { ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id, tunnel->peer_tunnel_id, cfg, @@ -1483,23 +1491,13 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, rcu_assign_sk_user_data(sk, tunnel); write_unlock_bh(&sk->sk_callback_lock); - tunnel->l2tp_net = net; - pn = l2tp_pernet(net); - sock_hold(sk); tunnel->sock = sk; + tunnel->l2tp_net = net; - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) { - if (tunnel_walk->tunnel_id == tunnel->tunnel_id) { - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - sock_put(sk); - ret = -EEXIST; - goto err_sock; - } - } - list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { struct udp_tunnel_sock_cfg udp_cfg = { @@ -1525,9 +1523,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, return 0; -err_sock: - write_lock_bh(&sk->sk_callback_lock); - rcu_assign_sk_user_data(sk, NULL); err_inval_sock: write_unlock_bh(&sk->sk_callback_lock); @@ -1536,6 +1531,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, else sockfd_put(sock); err: + l2tp_tunnel_remove(net, tunnel); return ret; } EXPORT_SYMBOL_GPL(l2tp_tunnel_register); @@ -1649,8 +1645,8 @@ static __net_init int l2tp_init_net(struct net *net) struct l2tp_net *pn = net_generic(net, l2tp_net_id); int hash; - INIT_LIST_HEAD(&pn->l2tp_tunnel_list); - spin_lock_init(&pn->l2tp_tunnel_list_lock); + idr_init(&pn->l2tp_tunnel_idr); + spin_lock_init(&pn->l2tp_tunnel_idr_lock); for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]); @@ -1664,11 +1660,13 @@ static __net_exit void l2tp_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; + unsigned long tunnel_id, tmp; int hash; rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - l2tp_tunnel_delete(tunnel); + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel) + l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); @@ -1678,6 +1676,7 @@ static __net_exit void l2tp_exit_net(struct net *net) for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash])); + idr_destroy(&pn->l2tp_tunnel_idr); } static struct pernet_operations l2tp_net_ops = { From 2d77e5c0ad79004b5ef901895437e9cce6dfcc7e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Jan 2023 19:01:37 -0800 Subject: [PATCH 037/143] l2tp: close all race conditions in l2tp_tunnel_register() [ Upstream commit 0b2c59720e65885a394a017d0cf9cab118914682 ] The code in l2tp_tunnel_register() is racy in several ways: 1. It modifies the tunnel socket _after_ publishing it. 2. It calls setup_udp_tunnel_sock() on an existing socket without locking. 3. It changes sock lock class on fly, which triggers many syzbot reports. This patch amends all of them by moving socket initialization code before publishing and under sock lock. As suggested by Jakub, the l2tp lockdep class is not necessary as we can just switch to bh_lock_sock_nested(). Fixes: 37159ef2c1ae ("l2tp: fix a lockdep splat") Fixes: 6b9f34239b00 ("l2tp: fix races in tunnel creation") Reported-by: syzbot+52866e24647f9a23403f@syzkaller.appspotmail.com Reported-by: syzbot+94cc2a66fc228b23f360@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Cc: Guillaume Nault Cc: Jakub Sitnicki Cc: Eric Dumazet Cc: Tom Parkin Signed-off-by: Cong Wang Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_core.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1bd52b8bb29f..386510a93696 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1041,7 +1041,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); nf_reset_ct(skb); - bh_lock_sock(sk); + bh_lock_sock_nested(sk); if (sock_owned_by_user(sk)) { kfree_skb(skb); ret = NET_XMIT_DROP; @@ -1387,8 +1387,6 @@ static int l2tp_tunnel_sock_create(struct net *net, return err; } -static struct lock_class_key l2tp_socket_class; - int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { @@ -1484,21 +1482,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, } sk = sock->sk; + lock_sock(sk); write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); - if (ret < 0) + if (ret < 0) { + release_sock(sk); goto err_inval_sock; + } rcu_assign_sk_user_data(sk, tunnel); write_unlock_bh(&sk->sk_callback_lock); - sock_hold(sk); - tunnel->sock = sk; - tunnel->l2tp_net = net; - - spin_lock_bh(&pn->l2tp_tunnel_idr_lock); - idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id); - spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); - if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { struct udp_tunnel_sock_cfg udp_cfg = { .sk_user_data = tunnel, @@ -1512,9 +1505,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, tunnel->old_sk_destruct = sk->sk_destruct; sk->sk_destruct = &l2tp_tunnel_destruct; - lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, - "l2tp_sock"); sk->sk_allocation = GFP_ATOMIC; + release_sock(sk); + + sock_hold(sk); + tunnel->sock = sk; + tunnel->l2tp_net = net; + + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); trace_register_tunnel(tunnel); From 67866b1e0ab9ca7858547c38f8f69d45892e67a8 Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Sat, 14 Jan 2023 19:23:26 +0100 Subject: [PATCH 038/143] net: usb: sr9700: Handle negative len [ Upstream commit ecf7cf8efb59789e2b21d2f9ab926142579092b2 ] Packet len computed as difference of length word extracted from skb data and four may result in a negative value. In such case processing of the buffer should be interrupted rather than setting sr_skb->len to an unexpectedly large value (due to cast from signed to unsigned integer) and passing sr_skb to usbnet_skb_return. Fixes: e9da0b56fe27 ("sr9700: sanity check for packet length") Signed-off-by: Szymon Heidrich Link: https://lore.kernel.org/r/20230114182326.30479-1-szymon.heidrich@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/usb/sr9700.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index fce6713e970b..811c8751308c 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -410,7 +410,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* ignore the CRC length */ len = (skb->data[1] | (skb->data[2] << 8)) - 4; - if (len > ETH_FRAME_LEN || len > skb->len) + if (len > ETH_FRAME_LEN || len > skb->len || len < 0) return 0; /* the last packet of current skb */ From 4bc5f1f6bc94e695dfd912122af96e7115a0ddb8 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 15 Jan 2023 11:54:06 +0100 Subject: [PATCH 039/143] net: mdio: validate parameter addr in mdiobus_get_phy() [ Upstream commit 867dbe784c5010a466f00a7d1467c1c5ea569c75 ] The caller may pass any value as addr, what may result in an out-of-bounds access to array mdio_map. One existing case is stmmac_init_phy() that may pass -1 as addr. Therefore validate addr before using it. Fixes: 7f854420fbfe ("phy: Add API for {un}registering an mdio device to a bus.") Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/cdf664ea-3312-e915-73f8-021678d08887@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/mdio_bus.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 77ba6c3c7a09..e9303be48655 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -108,7 +108,12 @@ EXPORT_SYMBOL(mdiobus_unregister_device); struct phy_device *mdiobus_get_phy(struct mii_bus *bus, int addr) { - struct mdio_device *mdiodev = bus->mdio_map[addr]; + struct mdio_device *mdiodev; + + if (addr < 0 || addr >= ARRAY_SIZE(bus->mdio_map)) + return NULL; + + mdiodev = bus->mdio_map[addr]; if (!mdiodev) return NULL; From 5dc3469a1170dd1344d262a332b26994214eeb58 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Mon, 16 Jan 2023 11:11:24 +0000 Subject: [PATCH 040/143] HID: check empty report_list in hid_validate_values() [ Upstream commit b12fece4c64857e5fab4290bf01b2e0317a88456 ] Add a check for empty report_list in hid_validate_values(). The missing check causes a type confusion when issuing a list_entry() on an empty report_list. The problem is caused by the assumption that the device must have valid report_list. While this will be true for all normal HID devices, a suitably malicious device can violate the assumption. Fixes: 1b15d2e5b807 ("HID: core: fix validation of report id 0") Signed-off-by: Pietro Borrello Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index eaaf732f0630..baadead947c8 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -988,8 +988,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid, * Validating on id 0 means we should examine the first * report in the list. */ - report = list_entry( - hid->report_enum[type].report_list.next, + report = list_first_entry_or_null( + &hid->report_enum[type].report_list, struct hid_report, list); } else { report = hid->report_enum[type].report_id_hash[id]; From 20fd4598762e2d717deb64ef028e6f5f587ac2a6 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Mon, 16 Jan 2023 11:11:25 +0000 Subject: [PATCH 041/143] HID: check empty report_list in bigben_probe() [ Upstream commit c7bf714f875531f227f2ef1fdcc8f4d44e7c7d9d ] Add a check for empty report_list in bigben_probe(). The missing check causes a type confusion when issuing a list_entry() on an empty report_list. The problem is caused by the assumption that the device must have valid report_list. While this will be true for all normal HID devices, a suitably malicious device can violate the assumption. Fixes: 256a90ed9e46 ("HID: hid-bigbenff: driver for BigBen Interactive PS3OFMINIPAD gamepad") Signed-off-by: Pietro Borrello Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-bigbenff.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index e8c5e3ac9fff..e8b16665860d 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -344,6 +344,11 @@ static int bigben_probe(struct hid_device *hid, } report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; + if (list_empty(report_list)) { + hid_err(hid, "no output report found\n"); + error = -ENODEV; + goto error_hw_stop; + } bigben->report = list_entry(report_list->next, struct hid_report, list); From 34f11949938b90f29886579fa3450a0df1ba3a5b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 15 Jan 2023 18:24:08 +0100 Subject: [PATCH 042/143] net: stmmac: fix invalid call to mdiobus_get_phy() [ Upstream commit 1f3bd64ad921f051254591fbed04fd30b306cde6 ] In a number of cases the driver assigns a default value of -1 to priv->plat->phy_addr. This may result in calling mdiobus_get_phy() with addr parameter being -1. Therefore check for this scenario and bail out before calling mdiobus_get_phy(). Fixes: 42e87024f727 ("net: stmmac: Fix case when PHY handle is not present") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/669f9671-ecd1-a41b-2727-7b73e3003985@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 14ea0168b548..b52ca2fe04d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1125,6 +1125,11 @@ static int stmmac_init_phy(struct net_device *dev) int addr = priv->plat->phy_addr; struct phy_device *phydev; + if (addr < 0) { + netdev_err(priv->dev, "no phy found\n"); + return -ENODEV; + } + phydev = mdiobus_get_phy(priv->mii, addr); if (!phydev) { netdev_err(priv->dev, "no phy at addr %d\n", addr); From 55be77aa8974ada9c47b7d966592c4d4119785cc Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 17 Jan 2023 15:41:40 +0100 Subject: [PATCH 043/143] HID: revert CHERRY_MOUSE_000C quirk [ Upstream commit cbf44580ce6b310272a73e3e794233fd064330bd ] This partially reverts commit f6d910a89a2391 ("HID: usbhid: Add ALWAYS_POLL quirk for some mice"), as it turns out to break reboot on some platforms for reason yet to be understood. Fixes: f6d910a89a2391 ("HID: usbhid: Add ALWAYS_POLL quirk for some mice") Reported-by: Christian Zigotzky Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-ids.h | 1 - drivers/hid/hid-quirks.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 09c3f30f10d3..1d1306a6004e 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -257,7 +257,6 @@ #define USB_DEVICE_ID_CH_AXIS_295 0x001c #define USB_VENDOR_ID_CHERRY 0x046a -#define USB_DEVICE_ID_CHERRY_MOUSE_000C 0x000c #define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023 #define USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR 0x0027 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 1efde40e5136..9f1fcbea19eb 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -54,7 +54,6 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, - { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_MOUSE_000C), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, From 6dd9ea05534f323668db94fcc2726c7a84547e78 Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 15 Dec 2022 10:59:05 +0530 Subject: [PATCH 044/143] usb: gadget: f_fs: Prevent race during ffs_ep0_queue_wait [ Upstream commit 6a19da111057f69214b97c62fb0ac59023970850 ] While performing fast composition switch, there is a possibility that the process of ffs_ep0_write/ffs_ep0_read get into a race condition due to ep0req being freed up from functionfs_unbind. Consider the scenario that the ffs_ep0_write calls the ffs_ep0_queue_wait by taking a lock &ffs->ev.waitq.lock. However, the functionfs_unbind isn't bounded so it can go ahead and mark the ep0req to NULL, and since there is no NULL check in ffs_ep0_queue_wait we will end up in use-after-free. Fix this by making a serialized execution between the two functions using a mutex_lock(ffs->mutex). Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Udipto Goswami Tested-by: Krishna Kurapati Link: https://lore.kernel.org/r/20221215052906.8993-2-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_fs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index bb0d92837f67..38942c6d3019 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -278,6 +278,9 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len) struct usb_request *req = ffs->ep0req; int ret; + if (!req) + return -EINVAL; + req->zero = len < le16_to_cpu(ffs->ev.setup.wLength); spin_unlock_irq(&ffs->ev.waitq.lock); @@ -1881,10 +1884,12 @@ static void functionfs_unbind(struct ffs_data *ffs) ENTER(); if (!WARN_ON(!ffs->gadget)) { + mutex_lock(&ffs->mutex); usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); ffs->ep0req = NULL; ffs->gadget = NULL; clear_bit(FFS_FL_BOUND, &ffs->flags); + mutex_unlock(&ffs->mutex); ffs_data_put(ffs); } } From 4b3b5cc1a7dc28992dd728a1ebb2bb9e63a3f6fb Mon Sep 17 00:00:00 2001 From: Udipto Goswami Date: Thu, 15 Dec 2022 10:59:06 +0530 Subject: [PATCH 045/143] usb: gadget: f_fs: Ensure ep0req is dequeued before free_request [ Upstream commit ce405d561b020e5a46340eb5146805a625dcacee ] As per the documentation, function usb_ep_free_request guarantees the request will not be queued or no longer be re-queued (or otherwise used). However, with the current implementation it doesn't make sure that the request in ep0 isn't reused. Fix this by dequeuing the ep0req on functionfs_unbind before freeing the request to align with the definition. Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Udipto Goswami Tested-by: Krishna Kurapati Link: https://lore.kernel.org/r/20221215052906.8993-3-quic_ugoswami@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/f_fs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 38942c6d3019..94000fd190e5 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1884,6 +1884,8 @@ static void functionfs_unbind(struct ffs_data *ffs) ENTER(); if (!WARN_ON(!ffs->gadget)) { + /* dequeue before freeing ep0req */ + usb_ep_dequeue(ffs->gadget->ep0, ffs->ep0req); mutex_lock(&ffs->mutex); usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req); ffs->ep0req = NULL; From 31f63c62a8e81911c65d83837c75ca81e508aa42 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 30 Aug 2022 20:12:29 -0700 Subject: [PATCH 046/143] net: mlx5: eliminate anonymous module_init & module_exit [ Upstream commit 2c1e1b949024989e20907b84e11a731a50778416 ] Eliminate anonymous module_init() and module_exit(), which can lead to confusion or ambiguity when reading System.map, crashes/oops/bugs, or an initcall_debug log. Give each of these init and exit functions unique driver-specific names to eliminate the anonymous names. Example 1: (System.map) ffffffff832fc78c t init ffffffff832fc79e t init ffffffff832fc8f8 t init Example 2: (initcall_debug log) calling init+0x0/0x12 @ 1 initcall init+0x0/0x12 returned 0 after 15 usecs calling init+0x0/0x60 @ 1 initcall init+0x0/0x60 returned 0 after 2 usecs calling init+0x0/0x9a @ 1 initcall init+0x0/0x9a returned 0 after 74 usecs Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Randy Dunlap Cc: Eli Cohen Cc: Saeed Mahameed Cc: Leon Romanovsky Cc: linux-rdma@vger.kernel.org Reviewed-by: Ira Weiny Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 29bc1df28aeb..112eaef186e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1642,7 +1642,7 @@ static void mlx5_core_verify_params(void) } } -static int __init init(void) +static int __init mlx5_init(void) { int err; @@ -1667,7 +1667,7 @@ static int __init init(void) return err; } -static void __exit cleanup(void) +static void __exit mlx5_cleanup(void) { #ifdef CONFIG_MLX5_CORE_EN mlx5e_cleanup(); @@ -1676,5 +1676,5 @@ static void __exit cleanup(void) mlx5_unregister_debugfs(); } -module_init(init); -module_exit(cleanup); +module_init(mlx5_init); +module_exit(mlx5_cleanup); From 1e97e2e08e79a55488876d7d538d0ddb7d0a6552 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Jan 2023 17:44:43 +0100 Subject: [PATCH 047/143] drm/panfrost: fix GENERIC_ATOMIC64 dependency [ Upstream commit 6437a549ae178a3f5a5c03e983f291ebcdc2bbc7 ] On ARMv5 and earlier, a randconfig build can still run into WARNING: unmet direct dependencies detected for IOMMU_IO_PGTABLE_LPAE Depends on [n]: IOMMU_SUPPORT [=y] && (ARM [=y] || ARM64 || COMPILE_TEST [=y]) && !GENERIC_ATOMIC64 [=y] Selected by [y]: - DRM_PANFROST [=y] && HAS_IOMEM [=y] && DRM [=y] && (ARM [=y] || ARM64 || COMPILE_TEST [=y] && !GENERIC_ATOMIC64 [=y]) && MMU [=y] Rework the dependencies to always require a working cmpxchg64. Fixes: db594ba3fcf9 ("drm/panfrost: depend on !GENERIC_ATOMIC64 when using COMPILE_TEST") Signed-off-by: Arnd Bergmann Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20230117164456.1591901-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panfrost/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig index 86cdc0ce79e6..77f4d32e5204 100644 --- a/drivers/gpu/drm/panfrost/Kconfig +++ b/drivers/gpu/drm/panfrost/Kconfig @@ -3,7 +3,8 @@ config DRM_PANFROST tristate "Panfrost (DRM support for ARM Mali Midgard/Bifrost GPUs)" depends on DRM - depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE depends on MMU select DRM_SCHED select IOMMU_SUPPORT From 42ecd72f02cd657b00b559621e7ef7d2c4d3e5f1 Mon Sep 17 00:00:00 2001 From: Koba Ko Date: Thu, 1 Dec 2022 11:00:50 +0800 Subject: [PATCH 048/143] dmaengine: Fix double increment of client_count in dma_chan_get() [ Upstream commit f3dc1b3b4750851a94212dba249703dd0e50bb20 ] The first time dma_chan_get() is called for a channel the channel client_count is incorrectly incremented twice for public channels, first in balance_ref_count(), and again prior to returning. This results in an incorrect client count which will lead to the channel resources not being freed when they should be. A simple test of repeated module load and unload of async_tx on a Dell Power Edge R7425 also shows this resulting in a kref underflow warning. [ 124.329662] async_tx: api initialized (async) [ 129.000627] async_tx: api initialized (async) [ 130.047839] ------------[ cut here ]------------ [ 130.052472] refcount_t: underflow; use-after-free. [ 130.057279] WARNING: CPU: 3 PID: 19364 at lib/refcount.c:28 refcount_warn_saturate+0xba/0x110 [ 130.065811] Modules linked in: async_tx(-) rfkill intel_rapl_msr intel_rapl_common amd64_edac edac_mce_amd ipmi_ssif kvm_amd dcdbas kvm mgag200 drm_shmem_helper acpi_ipmi irqbypass drm_kms_helper ipmi_si syscopyarea sysfillrect rapl pcspkr ipmi_devintf sysimgblt fb_sys_fops k10temp i2c_piix4 ipmi_msghandler acpi_power_meter acpi_cpufreq vfat fat drm fuse xfs libcrc32c sd_mod t10_pi sg ahci crct10dif_pclmul libahci crc32_pclmul crc32c_intel ghash_clmulni_intel igb megaraid_sas i40e libata i2c_algo_bit ccp sp5100_tco dca dm_mirror dm_region_hash dm_log dm_mod [last unloaded: async_tx] [ 130.117361] CPU: 3 PID: 19364 Comm: modprobe Kdump: loaded Not tainted 5.14.0-185.el9.x86_64 #1 [ 130.126091] Hardware name: Dell Inc. PowerEdge R7425/02MJ3T, BIOS 1.18.0 01/17/2022 [ 130.133806] RIP: 0010:refcount_warn_saturate+0xba/0x110 [ 130.139041] Code: 01 01 e8 6d bd 55 00 0f 0b e9 72 9d 8a 00 80 3d 26 18 9c 01 00 75 85 48 c7 c7 f8 a3 03 9d c6 05 16 18 9c 01 01 e8 4a bd 55 00 <0f> 0b e9 4f 9d 8a 00 80 3d 01 18 9c 01 00 0f 85 5e ff ff ff 48 c7 [ 130.157807] RSP: 0018:ffffbf98898afe68 EFLAGS: 00010286 [ 130.163036] RAX: 0000000000000000 RBX: ffff9da06028e598 RCX: 0000000000000000 [ 130.170172] RDX: ffff9daf9de26480 RSI: ffff9daf9de198a0 RDI: ffff9daf9de198a0 [ 130.177316] RBP: ffff9da7cddf3970 R08: 0000000000000000 R09: 00000000ffff7fff [ 130.184459] R10: ffffbf98898afd00 R11: ffffffff9d9e8c28 R12: ffff9da7cddf1970 [ 130.191596] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 130.198739] FS: 00007f646435c740(0000) GS:ffff9daf9de00000(0000) knlGS:0000000000000000 [ 130.206832] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 130.212586] CR2: 00007f6463b214f0 CR3: 00000008ab98c000 CR4: 00000000003506e0 [ 130.219729] Call Trace: [ 130.222192] [ 130.224305] dma_chan_put+0x10d/0x110 [ 130.227988] dmaengine_put+0x7a/0xa0 [ 130.231575] __do_sys_delete_module.constprop.0+0x178/0x280 [ 130.237157] ? syscall_trace_enter.constprop.0+0x145/0x1d0 [ 130.242652] do_syscall_64+0x5c/0x90 [ 130.246240] ? exc_page_fault+0x62/0x150 [ 130.250178] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 130.255243] RIP: 0033:0x7f6463a3f5ab [ 130.258830] Code: 73 01 c3 48 8b 0d 75 a8 1b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 b0 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 45 a8 1b 00 f7 d8 64 89 01 48 [ 130.277591] RSP: 002b:00007fff22f972c8 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0 [ 130.285164] RAX: ffffffffffffffda RBX: 000055b6786edd40 RCX: 00007f6463a3f5ab [ 130.292303] RDX: 0000000000000000 RSI: 0000000000000800 RDI: 000055b6786edda8 [ 130.299443] RBP: 000055b6786edd40 R08: 0000000000000000 R09: 0000000000000000 [ 130.306584] R10: 00007f6463b9eac0 R11: 0000000000000206 R12: 000055b6786edda8 [ 130.313731] R13: 0000000000000000 R14: 000055b6786edda8 R15: 00007fff22f995f8 [ 130.320875] [ 130.323081] ---[ end trace eff7156d56b5cf25 ]--- cat /sys/class/dma/dma0chan*/in_use would get the wrong result. 2 2 2 Fixes: d2f4f99db3e9 ("dmaengine: Rework dma_chan_get") Signed-off-by: Koba Ko Reviewed-by: Jie Hai Test-by: Jie Hai Reviewed-by: Jerry Snitselaar Reviewed-by: Dave Jiang Tested-by: Joel Savitz Link: https://lore.kernel.org/r/20221201030050.978595-1-koba.ko@canonical.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/dmaengine.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index af3ee288bc11..4ec7bb58c195 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -451,7 +451,8 @@ static int dma_chan_get(struct dma_chan *chan) /* The channel is already in use, update client count */ if (chan->client_count) { __module_get(owner); - goto out; + chan->client_count++; + return 0; } if (!try_module_get(owner)) @@ -470,11 +471,11 @@ static int dma_chan_get(struct dma_chan *chan) goto err_out; } + chan->client_count++; + if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask)) balance_ref_count(chan); -out: - chan->client_count++; return 0; err_out: From f96b2f690887332e02a8994d8bcb6f98b6750539 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Mon, 16 Jan 2023 15:41:33 -0600 Subject: [PATCH 049/143] net: macb: fix PTP TX timestamp failure due to packet padding [ Upstream commit 7b90f5a665acd46efbbfa677a3a3a18d01ad6487 ] PTP TX timestamp handling was observed to be broken with this driver when using the raw Layer 2 PTP encapsulation. ptp4l was not receiving the expected TX timestamp after transmitting a packet, causing it to enter a failure state. The problem appears to be due to the way that the driver pads packets which are smaller than the Ethernet minimum of 60 bytes. If headroom space was available in the SKB, this caused the driver to move the data back to utilize it. However, this appears to cause other data references in the SKB to become inconsistent. In particular, this caused the ptp_one_step_sync function to later (in the TX completion path) falsely detect the packet as a one-step SYNC packet, even when it was not, which caused the TX timestamp to not be processed when it should be. Using the headroom for this purpose seems like an unnecessary complexity as this is not a hot path in the driver, and in most cases it appears that there is sufficient tailroom to not require using the headroom anyway. Remove this usage of headroom to prevent this inconsistency from occurring and causing other problems. Fixes: 653e92a9175e ("net: macb: add support for padding and fcs computation") Signed-off-by: Robert Hancock Reviewed-by: Jacob Keller Tested-by: Claudiu Beznea # on SAMA7G5 Reviewed-by: Claudiu Beznea Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cadence/macb_main.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 792c8147c2c4..e0d62e251387 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1963,7 +1963,6 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) bool cloned = skb_cloned(*skb) || skb_header_cloned(*skb) || skb_is_nonlinear(*skb); int padlen = ETH_ZLEN - (*skb)->len; - int headroom = skb_headroom(*skb); int tailroom = skb_tailroom(*skb); struct sk_buff *nskb; u32 fcs; @@ -1977,9 +1976,6 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) /* FCS could be appeded to tailroom. */ if (tailroom >= ETH_FCS_LEN) goto add_fcs; - /* FCS could be appeded by moving data to headroom. */ - else if (!cloned && headroom + tailroom >= ETH_FCS_LEN) - padlen = 0; /* No room for FCS, need to reallocate skb. */ else padlen = ETH_FCS_LEN; @@ -1988,10 +1984,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev) padlen += ETH_FCS_LEN; } - if (!cloned && headroom + tailroom >= padlen) { - (*skb)->data = memmove((*skb)->head, (*skb)->data, (*skb)->len); - skb_set_tail_pointer(*skb, (*skb)->len); - } else { + if (cloned || tailroom < padlen) { nskb = skb_copy_expand(*skb, 0, padlen, GFP_ATOMIC); if (!nskb) return -ENOMEM; From 16791d5a7a9ae5d0010fc751d2da6c57770889f4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Jan 2023 11:01:31 +0000 Subject: [PATCH 050/143] l2tp: prevent lockdep issue in l2tp_tunnel_register() [ Upstream commit b9fb10d131b8c84af9bb14e2078d5c63600c7dea ] lockdep complains with the following lock/unlock sequence: lock_sock(sk); write_lock_bh(&sk->sk_callback_lock); [1] release_sock(sk); [2] write_unlock_bh(&sk->sk_callback_lock); We need to swap [1] and [2] to fix this issue. Fixes: 0b2c59720e65 ("l2tp: close all race conditions in l2tp_tunnel_register()") Reported-by: syzbot+bbd35b345c7cab0d9a08@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/netdev/20230114030137.672706-1-xiyou.wangcong@gmail.com/T/#m1164ff20628671b0f326a24cb106ab3239c70ce3 Cc: Cong Wang Cc: Guillaume Nault Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 386510a93696..a4b793d1b7d7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1485,10 +1485,8 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, lock_sock(sk); write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); - if (ret < 0) { - release_sock(sk); + if (ret < 0) goto err_inval_sock; - } rcu_assign_sk_user_data(sk, tunnel); write_unlock_bh(&sk->sk_callback_lock); @@ -1525,6 +1523,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, err_inval_sock: write_unlock_bh(&sk->sk_callback_lock); + release_sock(sk); if (tunnel->fd < 0) sock_release(sock); From 28fc6095da22dc88433d79578ae1c495ebe8ca43 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Wed, 11 Jan 2023 18:12:16 +0000 Subject: [PATCH 051/143] HID: betop: check shape of output reports [ Upstream commit 3782c0d6edf658b71354a64d60aa7a296188fc90 ] betopff_init() only checks the total sum of the report counts for each report field to be at least 4, but hid_betopff_play() expects 4 report fields. A device advertising an output report with one field and 4 report counts would pass the check but crash the kernel with a NULL pointer dereference in hid_betopff_play(). Fixes: 52cd7785f3cd ("HID: betop: add drivers/hid/hid-betopff.c") Signed-off-by: Pietro Borrello Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-betopff.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/hid/hid-betopff.c b/drivers/hid/hid-betopff.c index 467d789f9bc2..25ed7b9a917e 100644 --- a/drivers/hid/hid-betopff.c +++ b/drivers/hid/hid-betopff.c @@ -60,7 +60,6 @@ static int betopff_init(struct hid_device *hid) struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct input_dev *dev; - int field_count = 0; int error; int i, j; @@ -86,19 +85,21 @@ static int betopff_init(struct hid_device *hid) * ----------------------------------------- * Do init them with default value. */ + if (report->maxfield < 4) { + hid_err(hid, "not enough fields in the report: %d\n", + report->maxfield); + return -ENODEV; + } for (i = 0; i < report->maxfield; i++) { + if (report->field[i]->report_count < 1) { + hid_err(hid, "no values in the field\n"); + return -ENODEV; + } for (j = 0; j < report->field[i]->report_count; j++) { report->field[i]->value[j] = 0x00; - field_count++; } } - if (field_count < 4) { - hid_err(hid, "not enough fields in the report: %d\n", - field_count); - return -ENODEV; - } - betopff = kzalloc(sizeof(*betopff), GFP_KERNEL); if (!betopff) return -ENOMEM; From 98519ed6911357cafd8572e6945c5e1240bea73f Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 22 Nov 2022 10:16:12 +0800 Subject: [PATCH 052/143] dmaengine: xilinx_dma: call of_node_put() when breaking out of for_each_child_of_node() [ Upstream commit 596b53ccc36a546ab28e8897315c5b4d1d5a0200 ] Since for_each_child_of_node() will increase the refcount of node, we need to call of_node_put() manually when breaking out of the iteration. Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support") Signed-off-by: Liu Shixin Acked-by: Peter Korsgaard Link: https://lore.kernel.org/r/20221122021612.1908866-1-liushixin2@huawei.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index e76adc31ab66..12ad4bb3c5f2 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -3119,8 +3119,10 @@ static int xilinx_dma_probe(struct platform_device *pdev) /* Initialize the channels */ for_each_child_of_node(node, child) { err = xilinx_dma_child_probe(xdev, child); - if (err < 0) + if (err < 0) { + of_node_put(child); goto error; + } } if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) { From 5f10f7efe0fc97c0ee2112a1032914f6fb2f940c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 18 Jan 2023 08:44:16 -0800 Subject: [PATCH 053/143] nvme-pci: fix timeout request state check [ Upstream commit 1c5842085851f786eba24a39ecd02650ad892064 ] Polling the completion can progress the request state to IDLE, either inline with the completion, or through softirq. Either way, the state may not be COMPLETED, so don't check for that. We only care if the state isn't IN_FLIGHT. This is fixing an issue where the driver aborts an IO that we just completed. Seeing the "aborting" message instead of "polled" is very misleading as to where the timeout problem resides. Fixes: bf392a5dc02a9b ("nvme-pci: Remove tag from process cq") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 67dd68462b81..c47512da9872 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1292,7 +1292,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) else nvme_poll_irqdisable(nvmeq); - if (blk_mq_request_completed(req)) { + if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid); From ddf16dae65d386eee5117f42c613422804701aef Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 18 Jan 2023 09:59:41 +0800 Subject: [PATCH 054/143] tcp: avoid the lookup process failing to get sk in ehash table [ Upstream commit 3f4ca5fafc08881d7a57daa20449d171f2887043 ] While one cpu is working on looking up the right socket from ehash table, another cpu is done deleting the request socket and is about to add (or is adding) the big socket from the table. It means that we could miss both of them, even though it has little chance. Let me draw a call trace map of the server side. CPU 0 CPU 1 ----- ----- tcp_v4_rcv() syn_recv_sock() inet_ehash_insert() -> sk_nulls_del_node_init_rcu(osk) __inet_lookup_established() -> __sk_nulls_add_node_rcu(sk, list) Notice that the CPU 0 is receiving the data after the final ack during 3-way shakehands and CPU 1 is still handling the final ack. Why could this be a real problem? This case is happening only when the final ack and the first data receiving by different CPUs. Then the server receiving data with ACK flag tries to search one proper established socket from ehash table, but apparently it fails as my map shows above. After that, the server fetches a listener socket and then sends a RST because it finds a ACK flag in the skb (data), which obeys RST definition in RFC 793. Besides, Eric pointed out there's one more race condition where it handles tw socket hashdance. Only by adding to the tail of the list before deleting the old one can we avoid the race if the reader has already begun the bucket traversal and it would possibly miss the head. Many thanks to Eric for great help from beginning to end. Fixes: 5e0724d027f0 ("tcp/dccp: fix hashdance race for passive sessions") Suggested-by: Eric Dumazet Signed-off-by: Jason Xing Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/lkml/20230112065336.41034-1-kerneljasonxing@gmail.com/ Link: https://lore.kernel.org/r/20230118015941.1313-1-kerneljasonxing@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 17 +++++++++++++++-- net/ipv4/inet_timewait_sock.c | 8 ++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index c68a1dae25ca..2615b72118d1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -571,8 +571,20 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); - ret = sk_nulls_del_node_init_rcu(osk); - } else if (found_dup_sk) { + ret = sk_hashed(osk); + if (ret) { + /* Before deleting the node, we insert a new one to make + * sure that the look-up-sk process would not miss either + * of them and that at least one node would exist in ehash + * table all the time. Otherwise there's a tiny chance + * that lookup process could find nothing in ehash table. + */ + __sk_nulls_add_node_tail_rcu(sk, list); + sk_nulls_del_node_init_rcu(osk); + } + goto unlock; + } + if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; @@ -581,6 +593,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ret) __sk_nulls_add_node_rcu(sk, list); +unlock: spin_unlock(lock); return ret; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index c411c87ae865..a00102d7c7fd 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -81,10 +81,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(inet_twsk_put); -static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, - struct hlist_nulls_head *list) +static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) { - hlist_nulls_add_head_rcu(&tw->tw_node, list); + hlist_nulls_add_tail_rcu(&tw->tw_node, list); } static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -120,7 +120,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); - inet_twsk_add_node_rcu(tw, &ehead->chain); + inet_twsk_add_node_tail_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) From 8e5be0ae5506de89dc9683075cdc855457dfb680 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 5 Dec 2022 16:04:34 +0800 Subject: [PATCH 055/143] w1: fix deadloop in __w1_remove_master_device() [ Upstream commit 25d5648802f12ae486076ceca5d7ddf1fef792b2 ] I got a deadloop report while doing device(ds2482) add/remove test: [ 162.241881] w1_master_driver w1_bus_master1: Waiting for w1_bus_master1 to become free: refcnt=1. [ 163.272251] w1_master_driver w1_bus_master1: Waiting for w1_bus_master1 to become free: refcnt=1. [ 164.296157] w1_master_driver w1_bus_master1: Waiting for w1_bus_master1 to become free: refcnt=1. ... __w1_remove_master_device() can't return, because the dev->refcnt is not zero. w1_add_master_device() | w1_alloc_dev() | atomic_set(&dev->refcnt, 2) | kthread_run() | |__w1_remove_master_device() | kthread_stop() // KTHREAD_SHOULD_STOP is set, | // threadfn(w1_process) won't be | // called. | kthread() | | // refcnt will never be 0, it's deadloop. | while (atomic_read(&dev->refcnt)) {...} After calling w1_add_master_device(), w1_process() is not really invoked, before w1_process() starting, if kthread_stop() is called in __w1_remove_master_device(), w1_process() will never be called, the refcnt can not be decreased, then it causes deadloop in remove function because of non-zero refcnt. We need to make sure w1_process() is really started, so move the set refcnt into w1_process() to fix this problem. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221205080434.3149205-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/w1/w1.c | 2 ++ drivers/w1/w1_int.c | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 15a2ee32f116..6e9f3569971d 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -1131,6 +1131,8 @@ int w1_process(void *data) /* remainder if it woke up early */ unsigned long jremain = 0; + atomic_inc(&dev->refcnt); + for (;;) { if (!jremain && dev->search_count) { diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index b3e1792d9c49..3a71c5eb2f83 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -51,10 +51,9 @@ static struct w1_master *w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->search_count = w1_search_count; dev->enable_pullup = w1_enable_pullup; - /* 1 for w1_process to decrement - * 1 for __w1_remove_master_device to decrement + /* For __w1_remove_master_device to decrement */ - atomic_set(&dev->refcnt, 2); + atomic_set(&dev->refcnt, 1); INIT_LIST_HEAD(&dev->slist); INIT_LIST_HEAD(&dev->async_list); From 216f35db6ec6a667cd9db4838d657c1d2f4684da Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 5 Dec 2022 18:15:58 +0800 Subject: [PATCH 056/143] w1: fix WARNING after calling w1_process() [ Upstream commit 36225a7c72e9e3e1ce4001b6ce72849f5c9a2d3b ] I got the following WARNING message while removing driver(ds2482): ------------[ cut here ]------------ do not call blocking ops when !TASK_RUNNING; state=1 set at [<000000002d50bfb6>] w1_process+0x9e/0x1d0 [wire] WARNING: CPU: 0 PID: 262 at kernel/sched/core.c:9817 __might_sleep+0x98/0xa0 CPU: 0 PID: 262 Comm: w1_bus_master1 Tainted: G N 6.1.0-rc3+ #307 RIP: 0010:__might_sleep+0x98/0xa0 Call Trace: exit_signals+0x6c/0x550 do_exit+0x2b4/0x17e0 kthread_exit+0x52/0x60 kthread+0x16d/0x1e0 ret_from_fork+0x1f/0x30 The state of task is set to TASK_INTERRUPTIBLE in loop in w1_process(), set it to TASK_RUNNING when it breaks out of the loop to avoid the warning. Fixes: 3c52e4e62789 ("W1: w1_process, block or sleep") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221205101558.3599162-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/w1/w1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 6e9f3569971d..15842377c8d2 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -1160,8 +1160,10 @@ int w1_process(void *data) */ mutex_unlock(&dev->list_mutex); - if (kthread_should_stop()) + if (kthread_should_stop()) { + __set_current_state(TASK_RUNNING); break; + } /* Only sleep when the search is active. */ if (dev->search_count) { From 704a423c9379a27edf14a88313f1c26ff7a3dc04 Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 25 Nov 2022 14:35:41 +0800 Subject: [PATCH 057/143] driver core: Fix test_async_probe_init saves device in wrong array [ Upstream commit 9be182da0a7526f1b9a3777a336f83baa2e64d23 ] In test_async_probe_init, second set of asynchronous devices are saved in sync_dev[sync_id], which should be async_dev[async_id]. This makes these devices not unregistered when exit. > modprobe test_async_driver_probe && \ > modprobe -r test_async_driver_probe && \ > modprobe test_async_driver_probe ... > sysfs: cannot create duplicate filename '/devices/platform/test_async_driver.4' > kobject_add_internal failed for test_async_driver.4 with -EEXIST, don't try to register things with the same name in the same directory. Fixes: 57ea974fb871 ("driver core: Rewrite test_async_driver_probe to cover serialization and NUMA affinity") Signed-off-by: Chen Zhongjin Link: https://lore.kernel.org/r/20221125063541.241328-1-chenzhongjin@huawei.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/test/test_async_driver_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/test/test_async_driver_probe.c b/drivers/base/test/test_async_driver_probe.c index 3bb7beb127a9..c157a912d673 100644 --- a/drivers/base/test/test_async_driver_probe.c +++ b/drivers/base/test/test_async_driver_probe.c @@ -146,7 +146,7 @@ static int __init test_async_probe_init(void) calltime = ktime_get(); for_each_online_cpu(cpu) { nid = cpu_to_node(cpu); - pdev = &sync_dev[sync_id]; + pdev = &async_dev[async_id]; *pdev = test_platform_device_register_node("test_async_driver", async_id, From a84240df70793d53d43ba8569e399500712c3651 Mon Sep 17 00:00:00 2001 From: Rakesh Sankaranarayanan Date: Wed, 18 Jan 2023 23:17:35 +0530 Subject: [PATCH 058/143] net: dsa: microchip: ksz9477: port map correction in ALU table entry register [ Upstream commit 6c977c5c2e4c5d8ad1b604724cc344e38f96fe9b ] ALU table entry 2 register in KSZ9477 have bit positions reserved for forwarding port map. This field is referred in ksz9477_fdb_del() for clearing forward port map and alu table. But current fdb_del refer ALU table entry 3 register for accessing forward port map. Update ksz9477_fdb_del() to get forward port map from correct alu table entry register. With this bug, issue can be observed while deleting static MAC entries. Delete any specific MAC entry using "bridge fdb del" command. This should clear all the specified MAC entries. But it is observed that entries with self static alone are retained. Tested on LAN9370 EVB since ksz9477_fdb_del() is used common across LAN937x and KSZ series. Fixes: b987e98e50ab ("dsa: add DSA switch driver for Microchip KSZ9477") Signed-off-by: Rakesh Sankaranarayanan Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230118174735.702377-1-rakesh.sankaranarayanan@microchip.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/microchip/ksz9477.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index ece4c0512ee2..f42f2f4e4b60 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -678,10 +678,10 @@ static int ksz9477_port_fdb_del(struct dsa_switch *ds, int port, ksz_read32(dev, REG_SW_ALU_VAL_D, &alu_table[3]); /* clear forwarding port */ - alu_table[2] &= ~BIT(port); + alu_table[1] &= ~BIT(port); /* if there is no port to forward, clear table */ - if ((alu_table[2] & ALU_V_PORT_MAP) == 0) { + if ((alu_table[1] & ALU_V_PORT_MAP) == 0) { alu_table[0] = 0; alu_table[1] = 0; alu_table[2] = 0; From d79e700680f9407abc8aa43e7bb758f94ccfecbf Mon Sep 17 00:00:00 2001 From: David Morley Date: Thu, 19 Jan 2023 19:00:28 +0000 Subject: [PATCH 059/143] tcp: fix rate_app_limited to default to 1 [ Upstream commit 300b655db1b5152d6101bcb6801d50899b20c2d6 ] The initial default value of 0 for tp->rate_app_limited was incorrect, since a flow is indeed application-limited until it first sends data. Fixing the default to be 1 is generally correct but also specifically will help user-space applications avoid using the initial tcpi_delivery_rate value of 0 that persists until the connection has some non-zero bandwidth sample. Fixes: eb8329e0a04d ("tcp: export data delivery rate") Suggested-by: Yuchung Cheng Signed-off-by: David Morley Signed-off-by: Neal Cardwell Tested-by: David Morley Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cc588bc2b11d..6a0560a735ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -432,6 +432,7 @@ void tcp_init_sock(struct sock *sk) /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; + tp->rate_app_limited = 1; /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. @@ -2837,6 +2838,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->last_oow_ack_time = 0; /* There's a bubble in the pipe until at least the first ACK. */ tp->app_limited = ~0U; + tp->rate_app_limited = 1; tp->rack.mstamp = 0; tp->rack.advanced = 0; tp->rack.reo_wnd_steps = 1; From 6bc564f3fec0a8ae4a26ec43a21001a2742d5d2e Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Sat, 26 Nov 2022 09:07:52 +0800 Subject: [PATCH 060/143] scsi: iscsi: Fix multiple iSCSI session unbind events sent to userspace [ Upstream commit a3be19b91ea7121d388084e8c07f5b1b982eb40c ] It was observed that the kernel would potentially send ISCSI_KEVENT_UNBIND_SESSION multiple times. Introduce 'target_state' in iscsi_cls_session() to make sure session will send only one unbind session event. This introduces a regression wrt. the issue fixed in commit 13e60d3ba287 ("scsi: iscsi: Report unbind session event when the target has been removed"). If iscsid dies for any reason after sending an unbind session to kernel, once iscsid is restarted, the kernel's ISCSI_KEVENT_UNBIND_SESSION event is lost and userspace is then unable to logout. However, the session is actually in invalid state (its target_id is INVALID) so iscsid should not sync this session during restart. Consequently we need to check the session's target state during iscsid restart. If session is in unbound state, do not sync this session and perform session teardown. This is OK because once a session is unbound, we can not recover it any more (mainly because its target id is INVALID). Signed-off-by: Wenchao Hao Link: https://lore.kernel.org/r/20221126010752.231917-1-haowenchao@huawei.com Reviewed-by: Mike Christie Reviewed-by: Wu Bo Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_iscsi.c | 50 ++++++++++++++++++++++++++--- include/scsi/scsi_transport_iscsi.h | 9 ++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index ef7cd7520e7c..092bd6a3d64a 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1674,6 +1674,13 @@ static const char *iscsi_session_state_name(int state) return name; } +static char *iscsi_session_target_state_name[] = { + [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND", + [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED", + [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED", + [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING", +}; + int iscsi_session_chkready(struct iscsi_cls_session *session) { unsigned long flags; @@ -1805,9 +1812,13 @@ static int iscsi_user_scan_session(struct device *dev, void *data) if ((scan_data->channel == SCAN_WILD_CARD || scan_data->channel == 0) && (scan_data->id == SCAN_WILD_CARD || - scan_data->id == id)) + scan_data->id == id)) { scsi_scan_target(&session->dev, 0, id, scan_data->lun, scan_data->rescan); + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_SCANNED; + spin_unlock_irqrestore(&session->lock, flags); + } } user_scan_exit: @@ -1996,31 +2007,41 @@ static void __iscsi_unbind_session(struct work_struct *work) struct iscsi_cls_host *ihost = shost->shost_data; unsigned long flags; unsigned int target_id; + bool remove_target = true; ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n"); /* Prevent new scans and make sure scanning is not in progress */ mutex_lock(&ihost->mutex); spin_lock_irqsave(&session->lock, flags); - if (session->target_id == ISCSI_MAX_TARGET) { + if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) { + remove_target = false; + } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - goto unbind_session_exit; + ISCSI_DBG_TRANS_SESSION(session, + "Skipping target unbinding: Session is unbound/unbinding.\n"); + return; } + session->target_state = ISCSI_SESSION_TARGET_UNBINDING; target_id = session->target_id; session->target_id = ISCSI_MAX_TARGET; spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); - scsi_remove_target(&session->dev); + if (remove_target) + scsi_remove_target(&session->dev); if (session->ida_used) ida_simple_remove(&iscsi_sess_ida, target_id); -unbind_session_exit: iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); + + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_UNBOUND; + spin_unlock_irqrestore(&session->lock, flags); } static void __iscsi_destroy_session(struct work_struct *work) @@ -2089,6 +2110,9 @@ int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id) session->ida_used = true; } else session->target_id = target_id; + spin_lock_irqsave(&session->lock, flags); + session->target_state = ISCSI_SESSION_TARGET_ALLOCATED; + spin_unlock_irqrestore(&session->lock, flags); dev_set_name(&session->dev, "session%u", session->sid); err = device_add(&session->dev); @@ -4343,6 +4367,19 @@ iscsi_session_attr(def_taskmgmt_tmo, ISCSI_PARAM_DEF_TASKMGMT_TMO, 0); iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0); iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0); +static ssize_t +show_priv_session_target_state(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + + return sysfs_emit(buf, "%s\n", + iscsi_session_target_state_name[session->target_state]); +} + +static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO, + show_priv_session_target_state, NULL); + static ssize_t show_priv_session_state(struct device *dev, struct device_attribute *attr, char *buf) @@ -4445,6 +4482,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_sess_boot_target.attr, &dev_attr_priv_sess_recovery_tmo.attr, &dev_attr_priv_sess_state.attr, + &dev_attr_priv_sess_target_state.attr, &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, @@ -4558,6 +4596,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO | S_IWUSR; else if (attr == &dev_attr_priv_sess_state.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_state.attr) + return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_target_id.attr) diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 037c77fb5dc5..c4de15f7a0a5 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -236,6 +236,14 @@ enum { ISCSI_SESSION_FREE, }; +enum { + ISCSI_SESSION_TARGET_UNBOUND, + ISCSI_SESSION_TARGET_ALLOCATED, + ISCSI_SESSION_TARGET_SCANNED, + ISCSI_SESSION_TARGET_UNBINDING, + ISCSI_SESSION_TARGET_MAX, +}; + #define ISCSI_MAX_TARGET -1 struct iscsi_cls_session { @@ -262,6 +270,7 @@ struct iscsi_cls_session { */ pid_t creator; int state; + int target_state; /* session target bind state */ int sid; /* session id */ void *dd_data; /* LLD private data */ struct device dev; /* sysfs transport/container device */ From b75e9fc402c404f704e20fb7ae535a2e06ab9f5f Mon Sep 17 00:00:00 2001 From: Sumit Gupta Date: Tue, 20 Dec 2022 21:32:37 +0530 Subject: [PATCH 061/143] cpufreq: Add Tegra234 to cpufreq-dt-platdev blocklist [ Upstream commit 01c5bb0cc2a39fbc56ff9a5ef28b79447f0c2351 ] Tegra234 platform uses the tegra194-cpufreq driver, so add it to the blocklist in cpufreq-dt-platdev driver to avoid the cpufreq driver registration from there. Signed-off-by: Sumit Gupta Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index a3734014db47..aea285651fba 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -130,6 +130,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "nvidia,tegra30", }, { .compatible = "nvidia,tegra124", }, { .compatible = "nvidia,tegra210", }, + { .compatible = "nvidia,tegra234", }, { .compatible = "qcom,apq8096", }, { .compatible = "qcom,msm8996", }, From 3b154d5204ff8aa82a2bd4d5ed29a7cf80723706 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 22 Dec 2022 23:28:21 -0800 Subject: [PATCH 062/143] kcsan: test: don't put the expect array on the stack [ Upstream commit 5b24ac2dfd3eb3e36f794af3aa7f2828b19035bd ] Size of the 'expect' array in the __report_matches is 1536 bytes, which is exactly the default frame size warning limit of the xtensa architecture. As a result allmodconfig xtensa kernel builds with the gcc that does not support the compiler plugins (which otherwise would push the said warning limit to 2K) fail with the following message: kernel/kcsan/kcsan_test.c:257:1: error: the frame size of 1680 bytes is larger than 1536 bytes Fix it by dynamically allocating the 'expect' array. Signed-off-by: Max Filippov Reviewed-by: Marco Elver Tested-by: Marco Elver Signed-off-by: Sasha Levin --- kernel/kcsan/kcsan-test.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c index ebe7fd245104..8a8ccaf4f38f 100644 --- a/kernel/kcsan/kcsan-test.c +++ b/kernel/kcsan/kcsan-test.c @@ -149,7 +149,7 @@ static bool report_matches(const struct expect_report *r) const bool is_assert = (r->access[0].type | r->access[1].type) & KCSAN_ACCESS_ASSERT; bool ret = false; unsigned long flags; - typeof(observed.lines) expect; + typeof(*observed.lines) *expect; const char *end; char *cur; int i; @@ -158,6 +158,10 @@ static bool report_matches(const struct expect_report *r) if (!report_available()) return false; + expect = kmalloc(sizeof(observed.lines), GFP_KERNEL); + if (WARN_ON(!expect)) + return false; + /* Generate expected report contents. */ /* Title */ @@ -241,6 +245,7 @@ static bool report_matches(const struct expect_report *r) strstr(observed.lines[2], expect[1]))); out: spin_unlock_irqrestore(&observed.lock, flags); + kfree(expect); return ret; } From 96f4899a3810053f45779e59cfe4b161f6a69c3d Mon Sep 17 00:00:00 2001 From: Chancel Liu Date: Wed, 4 Jan 2023 10:57:54 +0800 Subject: [PATCH 063/143] ASoC: fsl_micfil: Correct the number of steps on SX controls [ Upstream commit cdfa92eb90f5770b26a79824ef213ebdbbd988b1 ] The parameter "max" of SOC_SINGLE_SX_TLV() means the number of steps rather than maximum value. This patch corrects the minimum value to -8 and the number of steps to 15. Signed-off-by: Chancel Liu Acked-by: Shengjiu Wang Link: https://lore.kernel.org/r/20230104025754.3019235-1-chancel.liu@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_micfil.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 6c794605e33c..97f83c63e765 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -87,21 +87,21 @@ static DECLARE_TLV_DB_SCALE(gain_tlv, 0, 100, 0); static const struct snd_kcontrol_new fsl_micfil_snd_controls[] = { SOC_SINGLE_SX_TLV("CH0 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(0), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(0), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH1 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(1), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(1), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH2 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(2), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(2), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH3 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(3), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(3), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH4 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(4), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(4), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH5 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(5), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(5), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH6 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(6), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(6), 0x8, 0xF, gain_tlv), SOC_SINGLE_SX_TLV("CH7 Volume", REG_MICFIL_OUT_CTRL, - MICFIL_OUTGAIN_CHX_SHIFT(7), 0xF, 0x7, gain_tlv), + MICFIL_OUTGAIN_CHX_SHIFT(7), 0x8, 0xF, gain_tlv), SOC_ENUM_EXT("MICFIL Quality Select", fsl_micfil_quality_enum, snd_soc_get_enum_double, snd_soc_put_enum_double), From ae108a5fc9298ba273df4a070b07392e852a481f Mon Sep 17 00:00:00 2001 From: Patrick Thompson Date: Tue, 20 Dec 2022 15:58:26 -0500 Subject: [PATCH 064/143] drm: Add orientation quirk for Lenovo ideapad D330-10IGL [ Upstream commit 0688773f0710528e1ab302c3d6317e269f2e2e6e ] Panel is 800x1280 but mounted on a detachable form factor sideways. Signed-off-by: Patrick Thompson Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20221220205826.178008-1-ptf@google.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_panel_orientation_quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index ca0fefeaab20..ce739ba45c55 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -272,6 +272,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Ideapad D330-10IGL (HD) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGL"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* Lenovo Yoga Book X90F / X91F / X91L */ .matches = { /* Non exact match to match all versions */ From 2ca345d19cbd694ed9ea086e5b69289c22750a20 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Tue, 3 Jan 2023 15:11:07 +0100 Subject: [PATCH 065/143] s390/debug: add _ASM_S390_ prefix to header guard [ Upstream commit 0d4d52361b6c29bf771acd4fa461f06d78fb2fac ] Using DEBUG_H without a prefix is very generic and inconsistent with other header guards in arch/s390/include/asm. In fact it collides with the same name in the ath9k wireless driver though that depends on !S390 via disabled wireless support. Let's just use a consistent header guard name and prevent possible future trouble. Signed-off-by: Niklas Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/include/asm/debug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index c1b82bcc017c..29a1badbe2f5 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -4,8 +4,8 @@ * * Copyright IBM Corp. 1999, 2020 */ -#ifndef DEBUG_H -#define DEBUG_H +#ifndef _ASM_S390_DEBUG_H +#define _ASM_S390_DEBUG_H #include #include @@ -425,4 +425,4 @@ int debug_unregister_view(debug_info_t *id, struct debug_view *view); #define PRINT_FATAL(x...) printk(KERN_DEBUG PRINTK_HEADER x) #endif /* DASD_DEBUG */ -#endif /* DEBUG_H */ +#endif /* _ASM_S390_DEBUG_H */ From 00f23016118781f85d7ffda2dccf9a43de6f1c98 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Tue, 10 Jan 2023 11:12:52 +0800 Subject: [PATCH 066/143] cpufreq: armada-37xx: stop using 0 as NULL pointer [ Upstream commit 08f0adb193c008de640fde34a2e00a666c01d77c ] Use NULL for NULL pointer to fix the following sparse warning: drivers/cpufreq/armada-37xx-cpufreq.c:448:32: sparse: warning: Using plain integer as NULL pointer Signed-off-by: Miles Chen Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/armada-37xx-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 2de7fd18f66a..f0be8a43ec49 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -443,7 +443,7 @@ static int __init armada37xx_cpufreq_driver_init(void) return -ENODEV; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { dev_err(cpu_dev, "Cannot get clock for CPU0\n"); return PTR_ERR(clk); From d9a0752a6a1126e30dc4c68f310b4a5050518583 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 6 Jan 2023 23:15:06 +0000 Subject: [PATCH 067/143] ASoC: fsl_ssi: Rename AC'97 streams to avoid collisions with AC'97 CODEC [ Upstream commit 8c6a42b5b0ed6f96624f56954e93eeae107440a6 ] The SSI driver calls the AC'97 playback and transmit streams "AC97 Playback" and "AC97 Capture" respectively. This is the same name used by the generic AC'97 CODEC driver in ASoC, creating confusion for the Freescale ASoC card when it attempts to use these widgets in routing. Add a "CPU" in the name like the regular DAIs registered by the driver to disambiguate. Acked-by: Shengjiu Wang Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230106-asoc-udoo-probe-v1-1-a5d7469d4f67@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl-asoc-card.c | 8 ++++---- sound/soc/fsl/fsl_ssi.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 7cd14d6b9436..8c976fde44f0 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -117,11 +117,11 @@ static const struct snd_soc_dapm_route audio_map[] = { static const struct snd_soc_dapm_route audio_map_ac97[] = { /* 1st half -- Normal DAPM routes */ - {"Playback", NULL, "AC97 Playback"}, - {"AC97 Capture", NULL, "Capture"}, + {"Playback", NULL, "CPU AC97 Playback"}, + {"CPU AC97 Capture", NULL, "Capture"}, /* 2nd half -- ASRC DAPM routes */ - {"AC97 Playback", NULL, "ASRC-Playback"}, - {"ASRC-Capture", NULL, "AC97 Capture"}, + {"CPU AC97 Playback", NULL, "ASRC-Playback"}, + {"ASRC-Capture", NULL, "CPU AC97 Capture"}, }; static const struct snd_soc_dapm_route audio_map_tx[] = { diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 1d774c876c52..94229ce1a30e 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -1161,14 +1161,14 @@ static struct snd_soc_dai_driver fsl_ssi_ac97_dai = { .symmetric_channels = 1, .probe = fsl_ssi_dai_probe, .playback = { - .stream_name = "AC97 Playback", + .stream_name = "CPU AC97 Playback", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_48000, .formats = SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S20, }, .capture = { - .stream_name = "AC97 Capture", + .stream_name = "CPU AC97 Capture", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_48000, From a84def9b108a44bba2d6afc800302f977738f94f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 6 Jan 2023 23:15:07 +0000 Subject: [PATCH 068/143] ASoC: fsl-asoc-card: Fix naming of AC'97 CODEC widgets [ Upstream commit 242fc66ae6e1e2b8519daacc7590a73cd0e8a6e4 ] The fsl-asoc-card AC'97 support currently tries to route to Playback and Capture widgets provided by the AC'97 CODEC. This doesn't work since the generic AC'97 driver registers with an "AC97" at the front of the stream and hence widget names, update to reflect reality. It's not clear to me if or how this ever worked. Acked-by: Shengjiu Wang Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20230106-asoc-udoo-probe-v1-2-a5d7469d4f67@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl-asoc-card.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index 8c976fde44f0..9a756d0a6032 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -117,8 +117,8 @@ static const struct snd_soc_dapm_route audio_map[] = { static const struct snd_soc_dapm_route audio_map_ac97[] = { /* 1st half -- Normal DAPM routes */ - {"Playback", NULL, "CPU AC97 Playback"}, - {"CPU AC97 Capture", NULL, "Capture"}, + {"AC97 Playback", NULL, "CPU AC97 Playback"}, + {"CPU AC97 Capture", NULL, "AC97 Capture"}, /* 2nd half -- ASRC DAPM routes */ {"CPU AC97 Playback", NULL, "ASRC-Playback"}, {"ASRC-Capture", NULL, "CPU AC97 Capture"}, From 300da569a12808adb7acd0dc798f0437a8423fa7 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 6 Jan 2023 11:07:19 +0100 Subject: [PATCH 069/143] spi: spidev: remove debug messages that access spidev->spi without locking [ Upstream commit 6b35b173dbc1711f8d272e3f322d2ad697015919 ] The two debug messages in spidev_open() dereference spidev->spi without taking the lock and without checking if it's not null. This can lead to a crash. Drop the messages as they're not needed - the user-space will get informed about ENOMEM with the syscall return value. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230106100719.196243-2-brgl@bgdev.pl Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spidev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 9c5ec99431d2..aee960a7d7f9 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -592,7 +592,6 @@ static int spidev_open(struct inode *inode, struct file *filp) if (!spidev->tx_buffer) { spidev->tx_buffer = kmalloc(bufsiz, GFP_KERNEL); if (!spidev->tx_buffer) { - dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); status = -ENOMEM; goto err_find_dev; } @@ -601,7 +600,6 @@ static int spidev_open(struct inode *inode, struct file *filp) if (!spidev->rx_buffer) { spidev->rx_buffer = kmalloc(bufsiz, GFP_KERNEL); if (!spidev->rx_buffer) { - dev_dbg(&spidev->spi->dev, "open/ENOMEM\n"); status = -ENOMEM; goto err_alloc_rx_buf; } From 71bd134c4e9e2a12f4e210badb0b253dfee3712d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Jan 2023 15:54:56 +0100 Subject: [PATCH 070/143] KVM: s390: interrupt: use READ_ONCE() before cmpxchg() [ Upstream commit 42400d99e9f0728c17240edb9645637ead40f6b9 ] Use READ_ONCE() before cmpxchg() to prevent that the compiler generates code that fetches the to be compared old value several times from memory. Reviewed-by: Christian Borntraeger Acked-by: Christian Borntraeger Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20230109145456.2895385-1-hca@linux.ibm.com Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kvm/interrupt.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index b51ab19eb972..64d1dfe6dca5 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -81,8 +81,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + union esca_sigp_ctrl new_val = {0}, old_val; + old_val = READ_ONCE(*sigp_ctrl); new_val.scn = src_id; new_val.c = 1; old_val.c = 0; @@ -93,8 +94,9 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + union bsca_sigp_ctrl new_val = {0}, old_val; + old_val = READ_ONCE(*sigp_ctrl); new_val.scn = src_id; new_val.c = 1; old_val.c = 0; @@ -124,16 +126,18 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union esca_sigp_ctrl old = *sigp_ctrl; + union esca_sigp_ctrl old; + old = READ_ONCE(*sigp_ctrl); expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl old = *sigp_ctrl; + union bsca_sigp_ctrl old; + old = READ_ONCE(*sigp_ctrl); expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } From 52249c2168af44f56a7fd914661599a0d1409a6e Mon Sep 17 00:00:00 2001 From: Yihang Li Date: Wed, 4 Jan 2023 12:03:20 +0800 Subject: [PATCH 071/143] scsi: hisi_sas: Set a port invalid only if there are no devices attached when refreshing port id [ Upstream commit f58c89700630da6554b24fd3df293a24874c10c1 ] Currently the driver sets the port invalid if one phy in the port is not enabled, which may cause issues in expander situation. In directly attached situation, if phy up doesn't occur in time when refreshing port id, the port is incorrectly set to invalid which will also cause disk lost. Therefore set a port invalid only if there are no devices attached to the port. Signed-off-by: Yihang Li Signed-off-by: Xiang Chen Link: https://lore.kernel.org/r/1672805000-141102-3-git-send-email-chenxiang66@hisilicon.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hisi_sas/hisi_sas_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 1feca45384c7..e5b9229310a0 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1408,7 +1408,7 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba) device->linkrate = phy->sas_phy.linkrate; hisi_hba->hw->setup_itct(hisi_hba, sas_dev); - } else + } else if (!port->port_attached) port->id = 0xff; } } From 9968f9a86251d697c223031be2ae3d3f11eb0fd9 Mon Sep 17 00:00:00 2001 From: Michael Klein Date: Tue, 20 Dec 2022 13:11:03 +0100 Subject: [PATCH 072/143] platform/x86: touchscreen_dmi: Add info for the CSL Panther Tab HD [ Upstream commit 36c2b9d6710427f802494ba070621cb415198293 ] Add touchscreen info for the CSL Panther Tab HD. Signed-off-by: Michael Klein Link: https://lore.kernel.org/r/20221220121103.uiwn5l7fii2iggct@LLGMVZLB-0037 Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/touchscreen_dmi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 110ff1e6ef81..bc26acace2c3 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -255,6 +255,23 @@ static const struct ts_dmi_data connect_tablet9_data = { .properties = connect_tablet9_props, }; +static const struct property_entry csl_panther_tab_hd_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 1), + PROPERTY_ENTRY_U32("touchscreen-min-y", 20), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1980), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1526), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-csl-panther-tab-hd.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data csl_panther_tab_hd_data = { + .acpi_name = "MSSL1680:00", + .properties = csl_panther_tab_hd_props, +}; + static const struct property_entry cube_iwork8_air_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 1), PROPERTY_ENTRY_U32("touchscreen-min-y", 3), @@ -1057,6 +1074,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Tablet 9"), }, }, + { + /* CSL Panther Tab HD */ + .driver_data = (void *)&csl_panther_tab_hd_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "CSL Computer GmbH & Co. KG"), + DMI_MATCH(DMI_PRODUCT_NAME, "CSL Panther Tab HD"), + }, + }, { /* CUBE iwork8 Air */ .driver_data = (void *)&cube_iwork8_air_data, From f8ddf7dbf5e79fd3a5e890b6e45e9c51d6397a9e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 12 Jan 2023 19:18:41 +0100 Subject: [PATCH 073/143] platform/x86: asus-nb-wmi: Add alternate mapping for KEY_SCREENLOCK [ Upstream commit db9494895b405bf318dc7e563dee6daa51b3b6ed ] The 0x33 keycode is emitted by Fn + F6 on a ASUS FX705GE laptop. Reported-by: Nemcev Aleksey Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230112181841.84652-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/platform/x86/asus-nb-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 949ddeb673bc..74637bd0433e 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -478,6 +478,7 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, + { KE_KEY, 0x33, { KEY_SCREENLOCK } }, { KE_KEY, 0x35, { KEY_SCREENLOCK } }, { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, { KE_KEY, 0x41, { KEY_NEXTSONG } }, From 20a02bc845083abe5c7406caa9c6408ab0b2cc76 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Fri, 13 Jan 2023 19:44:47 +0100 Subject: [PATCH 074/143] lockref: stop doing cpu_relax in the cmpxchg loop [ Upstream commit f5fe24ef17b5fbe6db49534163e77499fb10ae8c ] On the x86-64 architecture even a failing cmpxchg grants exclusive access to the cacheline, making it preferable to retry the failed op immediately instead of stalling with the pause instruction. To illustrate the impact, below are benchmark results obtained by running various will-it-scale tests on top of the 6.2-rc3 kernel and Cascade Lake (2 sockets * 24 cores * 2 threads) CPU. All results in ops/s. Note there is some variance in re-runs, but the code is consistently faster when contention is present. open3 ("Same file open/close"): proc stock no-pause 1 805603 814942 (+%1) 2 1054980 1054781 (-0%) 8 1544802 1822858 (+18%) 24 1191064 2199665 (+84%) 48 851582 1469860 (+72%) 96 609481 1427170 (+134%) fstat2 ("Same file fstat"): proc stock no-pause 1 3013872 3047636 (+1%) 2 4284687 4400421 (+2%) 8 3257721 5530156 (+69%) 24 2239819 5466127 (+144%) 48 1701072 5256609 (+209%) 96 1269157 6649326 (+423%) Additionally, a kernel with a private patch to help access() scalability: access2 ("Same file access"): proc stock patched patched +nopause 24 2378041 2005501 5370335 (-15% / +125%) That is, fixing the problems in access itself *reduces* scalability after the cacheline ping-pong only happens in lockref with the pause instruction. Note that fstat and access benchmarks are not currently integrated into will-it-scale, but interested parties can find them in pull requests to said project. Code at hand has a rather tortured history. First modification showed up in commit d472d9d98b46 ("lockref: Relax in cmpxchg loop"), written with Itanium in mind. Later it got patched up to use an arch-dependent macro to stop doing it on s390 where it caused a significant regression. Said macro had undergone revisions and was ultimately eliminated later, going back to cpu_relax. While I intended to only remove cpu_relax for x86-64, I got the following comment from Linus: I would actually prefer just removing it entirely and see if somebody else hollers. You have the numbers to prove it hurts on real hardware, and I don't think we have any numbers to the contrary. So I think it's better to trust the numbers and remove it as a failure, than say "let's just remove it on x86-64 and leave everybody else with the potentially broken code" Additionally, Will Deacon (maintainer of the arm64 port, one of the architectures previously benchmarked): So, from the arm64 side of the fence, I'm perfectly happy just removing the cpu_relax() calls from lockref. As such, come back full circle in history and whack it altogether. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/all/CAGudoHHx0Nqg6DE70zAVA75eV-HXfWyhVMWZ-aSeOofkA_=WdA@mail.gmail.com/ Acked-by: Tony Luck # ia64 Acked-by: Nicholas Piggin # powerpc Acked-by: Will Deacon # arm64 Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/lockref.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/lockref.c b/lib/lockref.c index 5b34bbd3eba8..81ac5f355242 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -24,7 +24,6 @@ } \ if (!--retry) \ break; \ - cpu_relax(); \ } \ } while (0) From a7345145e7bdd7cfc4359e16a23ebde846c8ebf9 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 24 Jan 2023 07:02:31 -0500 Subject: [PATCH 075/143] Revert "selftests/bpf: check null propagation only neither reg is PTR_TO_BTF_ID" This reverts commit 3093027183f2846365a76b2fb1e1309f1960084a. Signed-off-by: Sasha Levin --- .../bpf/prog_tests/jeq_infer_not_null.c | 9 ---- .../bpf/progs/jeq_infer_not_null_fail.c | 42 ------------------- 2 files changed, 51 deletions(-) delete mode 100644 tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c delete mode 100644 tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c diff --git a/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c deleted file mode 100644 index 3add34df5767..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include "jeq_infer_not_null_fail.skel.h" - -void test_jeq_infer_not_null(void) -{ - RUN_TESTS(jeq_infer_not_null_fail); -} diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c deleted file mode 100644 index f46965053acb..000000000000 --- a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include "vmlinux.h" -#include -#include "bpf_misc.h" - -char _license[] SEC("license") = "GPL"; - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 1); - __type(key, u64); - __type(value, u64); -} m_hash SEC(".maps"); - -SEC("?raw_tp") -__failure __msg("R8 invalid mem access 'map_value_or_null") -int jeq_infer_not_null_ptr_to_btfid(void *ctx) -{ - struct bpf_map *map = (struct bpf_map *)&m_hash; - struct bpf_map *inner_map = map->inner_map_meta; - u64 key = 0, ret = 0, *val; - - val = bpf_map_lookup_elem(map, &key); - /* Do not mark ptr as non-null if one of them is - * PTR_TO_BTF_ID (R9), reject because of invalid - * access to map value (R8). - * - * Here, we need to inline those insns to access - * R8 directly, since compiler may use other reg - * once it figures out val==inner_map. - */ - asm volatile("r8 = %[val];\n" - "r9 = %[inner_map];\n" - "if r8 != r9 goto +1;\n" - "%[ret] = *(u64 *)(r8 +0);\n" - : [ret] "+r"(ret) - : [inner_map] "r"(inner_map), [val] "r"(val) - : "r8", "r9"); - - return ret; -} From 5fb884d748e438f87ecca4694933a4fae3efe34b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 24 Jun 2021 12:36:42 +0200 Subject: [PATCH 076/143] netfilter: conntrack: do not renew entry stuck in tcp SYN_SENT state [ Upstream commit e15d4cdf27cb0c1e977270270b2cea12e0955edd ] Consider: client -----> conntrack ---> Host client sends a SYN, but $Host is unreachable/silent. Client eventually gives up and the conntrack entry will time out. However, if the client is restarted with same addr/port pair, it may prevent the conntrack entry from timing out. This is noticeable when the existing conntrack entry has no NAT transformation or an outdated one and port reuse happens either on client or due to a NAT middlebox. This change prevents refresh of the timeout for SYN retransmits, so entry is going away after nf_conntrack_tcp_timeout_syn_sent seconds (default: 60). Entry will be re-created on next connection attempt, but then nat rules will be evaluated again. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto_tcp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3f785bdfa942..c1d02c0b4f00 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1158,6 +1158,16 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + + if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) { + /* do not renew timeout on SYN retransmit. + * + * Else port reuse by client or NAT middlebox can keep + * entry alive indefinitely (including nat info). + */ + return NF_ACCEPT; + } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection * pickup with loose=1. Avoid large ESTABLISHED timeout. */ From 1f6768143bf76cdad9ec83b25929bab061df48ec Mon Sep 17 00:00:00 2001 From: Deepak Sharma Date: Thu, 23 Sep 2021 23:12:05 -0700 Subject: [PATCH 077/143] x86: ACPI: cstate: Optimize C3 entry on AMD CPUs commit a8fb40966f19ff81520d9ccf8f7e2b95201368b8 upstream. All Zen or newer CPU which support C3 shares cache. Its not necessary to flush the caches in software before entering C3. This will cause drop in performance for the cores which share some caches. ARB_DIS is not used with current AMD C state implementation. So set related flags correctly. Signed-off-by: Deepak Sharma Acked-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Signed-off-by: Guilherme G. Piccoli Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/cstate.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 49ae4e1ac9cd..d28d43d774a2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -79,6 +79,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, */ flags->bm_control = 0; } + if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17) { + /* + * For all AMD Zen or newer CPUs that support C3, caches + * should not be flushed by software while entering C3 + * type state. Set bm->check to 1 so that kernel doesn't + * need to execute cache flush operation. + */ + flags->bm_check = 1; + /* + * In current AMD C state implementation ARB_DIS is no longer + * used. So set bm_control to zero to indicate ARB_DIS is not + * required while entering C3 type state. + */ + flags->bm_control = 0; + } } EXPORT_SYMBOL(acpi_processor_power_init_bm_check); From c4097e844a6e1368b7de1fded5713e7c52c07237 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 27 Oct 2021 22:34:41 +0800 Subject: [PATCH 078/143] fs: reiserfs: remove useless new_opts in reiserfs_remount commit 81dedaf10c20959bdf5624f9783f408df26ba7a4 upstream. Since the commit c3d98ea08291 ("VFS: Don't use save/replace_mount_options if not using generic_show_options") eliminates replace_mount_options in reiserfs_remount, but does not handle the allocated new_opts, it will cause memory leak in the reiserfs_remount. Because new_opts is useless in reiserfs_mount, so we fix this bug by removing the useless new_opts in reiserfs_remount. Fixes: c3d98ea08291 ("VFS: Don't use save/replace_mount_options if not using generic_show_options") Link: https://lore.kernel.org/r/20211027143445.4156459-1-mudongliangabcd@gmail.com Signed-off-by: Dongliang Mu Signed-off-by: Jan Kara Signed-off-by: Fedor Pchelkin Signed-off-by: Greg Kroah-Hartman --- fs/reiserfs/super.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 913f5af9bf24..0ebb6e684908 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1437,7 +1437,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); - char *new_opts; int err; char *qf_names[REISERFS_MAXQUOTAS]; unsigned int qfmt = 0; @@ -1445,10 +1444,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif - new_opts = kstrdup(arg, GFP_KERNEL); - if (arg && !new_opts) - return -ENOMEM; - sync_filesystem(s); reiserfs_write_lock(s); @@ -1599,7 +1594,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) out_err_unlock: reiserfs_write_unlock(s); out_err: - kfree(new_opts); return err; } From e6226917f4cf7f37f8b256b9edf4669a8d383009 Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Tue, 24 Jan 2023 11:29:45 -0800 Subject: [PATCH 079/143] sysctl: add a new register_sysctl_init() interface commit 3ddd9a808cee7284931312f2f3e854c9617f44b2 upstream. Patch series "sysctl: first set of kernel/sysctl cleanups", v2. Finally had time to respin the series of the work we had started last year on cleaning up the kernel/sysct.c kitchen sink. People keeps stuffing their sysctls in that file and this creates a maintenance burden. So this effort is aimed at placing sysctls where they actually belong. I'm going to split patches up into series as there is quite a bit of work. This first set adds register_sysctl_init() for uses of registerting a sysctl on the init path, adds const where missing to a few places, generalizes common values so to be more easy to share, and starts the move of a few kernel/sysctl.c out where they belong. The majority of rework on v2 in this first patch set is 0-day fixes. Eric Biederman's feedback is later addressed in subsequent patch sets. I'll only post the first two patch sets for now. We can address the rest once the first two patch sets get completely reviewed / Acked. This patch (of 9): The kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. Today though folks heavily rely on tables on kernel/sysctl.c so they can easily just extend this table with their needed sysctls. In order to help users move their sysctls out we need to provide a helper which can be used during code initialization. We special-case the initialization use of register_sysctl() since it *is* safe to fail, given all that sysctls do is provide a dynamic interface to query or modify at runtime an existing variable. So the use case of register_sysctl() on init should *not* stop if the sysctls don't end up getting registered. It would be counter productive to stop boot if a simple sysctl registration failed. Provide a helper for init then, and document the recommended init levels to use for callers of this routine. We will later use this in subsequent patches to start slimming down kernel/sysctl.c tables and moving sysctl registration to the code which actually needs these sysctls. [mcgrof@kernel.org: major commit log and documentation rephrasing also moved to fs/proc/proc_sysctl.c ] Link: https://lkml.kernel.org/r/20211123202347.818157-1-mcgrof@kernel.org Link: https://lkml.kernel.org/r/20211123202347.818157-2-mcgrof@kernel.org Signed-off-by: Xiaoming Ni Signed-off-by: Luis Chamberlain Reviewed-by: Kees Cook Cc: Iurii Zaikin Cc: "Eric W. Biederman" Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: Paul Turner Cc: Andy Shevchenko Cc: Sebastian Reichel Cc: Tetsuo Handa Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Qing Wang Cc: Benjamin LaHaise Cc: Al Viro Cc: Jan Kara Cc: Amir Goldstein Cc: Stephen Kitt Cc: Antti Palosaari Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Clemens Ladisch Cc: David Airlie Cc: Jani Nikula Cc: Joel Becker Cc: Joonas Lahtinen Cc: Joseph Qi Cc: Julia Lawall Cc: Lukas Middendorf Cc: Mark Fasheh Cc: Phillip Potter Cc: Rodrigo Vivi Cc: Douglas Gilbert Cc: James E.J. Bottomley Cc: Jani Nikula Cc: John Ogness Cc: Martin K. Petersen Cc: "Rafael J. Wysocki" Cc: Steven Rostedt (VMware) Cc: Suren Baghdasaryan Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- fs/proc/proc_sysctl.c | 33 +++++++++++++++++++++++++++++++++ include/linux/sysctl.h | 3 +++ 2 files changed, 36 insertions(+) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 070d2df8ab9c..cd7c6c4af83a 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -1380,6 +1381,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab } EXPORT_SYMBOL(register_sysctl); +/** + * __register_sysctl_init() - register sysctl table to path + * @path: path name for sysctl base + * @table: This is the sysctl table that needs to be registered to the path + * @table_name: The name of sysctl table, only used for log printing when + * registration fails + * + * The sysctl interface is used by userspace to query or modify at runtime + * a predefined value set on a variable. These variables however have default + * values pre-set. Code which depends on these variables will always work even + * if register_sysctl() fails. If register_sysctl() fails you'd just loose the + * ability to query or modify the sysctls dynamically at run time. Chances of + * register_sysctl() failing on init are extremely low, and so for both reasons + * this function does not return any error as it is used by initialization code. + * + * Context: Can only be called after your respective sysctl base path has been + * registered. So for instance, most base directories are registered early on + * init before init levels are processed through proc_sys_init() and + * sysctl_init(). + */ +void __init __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name) +{ + struct ctl_table_header *hdr = register_sysctl(path, table); + + if (unlikely(!hdr)) { + pr_err("failed when register_sysctl %s to %s\n", table_name, path); + return; + } + kmemleak_not_leak(hdr); +} + static char *append_path(const char *path, char *pos, const char *name) { int namelen; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 51298a4f4623..161eba9fd912 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -195,6 +195,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +extern void __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name); +#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) void do_sysctl_args(void); extern int pwrsw_enabled; From e97ec099d7fd2d963414bb9f5a0e895603d79b7b Mon Sep 17 00:00:00 2001 From: tangmeng Date: Tue, 24 Jan 2023 11:29:46 -0800 Subject: [PATCH 080/143] kernel/panic: move panic sysctls to its own file commit 9df918698408fd914493aba0b7858fef50eba63a upstream. kernel/sysctl.c is a kitchen sink where everyone leaves their dirty dishes, this makes it very difficult to maintain. To help with this maintenance let's start by moving sysctls to places where they actually belong. The proc sysctl maintainers do not want to know what sysctl knobs you wish to add for your own piece of code, we just care about the core logic. All filesystem syctls now get reviewed by fs folks. This commit follows the commit of fs, move the oops_all_cpu_backtrace sysctl to its own file, kernel/panic.c. Signed-off-by: tangmeng Signed-off-by: Luis Chamberlain Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- include/linux/kernel.h | 6 ------ kernel/panic.c | 26 +++++++++++++++++++++++++- kernel/sysctl.c | 11 ----------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f5392d96d688..084d97070ed9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -520,12 +520,6 @@ static inline u32 int_sqrt64(u64 x) } #endif -#ifdef CONFIG_SMP -extern unsigned int sysctl_oops_all_cpu_backtrace; -#else -#define sysctl_oops_all_cpu_backtrace 0 -#endif /* CONFIG_SMP */ - extern void bust_spinlocks(int yes); extern int panic_timeout; extern unsigned long panic_print; diff --git a/kernel/panic.c b/kernel/panic.c index 332736a72a58..f567195d45d9 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -41,7 +41,9 @@ * Should we dump all CPUs backtraces in an oops event? * Defaults to 0, can be changed via sysctl. */ -unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; +static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace; +#else +#define sysctl_oops_all_cpu_backtrace 0 #endif /* CONFIG_SMP */ int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; @@ -70,6 +72,28 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); +#if defined(CONFIG_SMP) && defined(CONFIG_SYSCTL) +static struct ctl_table kern_panic_table[] = { + { + .procname = "oops_all_cpu_backtrace", + .data = &sysctl_oops_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static __init int kernel_panic_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_panic_table); + return 0; +} +late_initcall(kernel_panic_sysctls_init); +#endif + static long no_blink(int state) { return 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3eb527f8a269..d8b7b2846313 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2199,17 +2199,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_SMP - { - .procname = "oops_all_cpu_backtrace", - .data = &sysctl_oops_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ { .procname = "pid_max", .data = &pid_max, From 590ba6fee0956127b7ec62af3a658ed71d175f83 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 24 Jan 2023 11:29:47 -0800 Subject: [PATCH 081/143] panic: unset panic_on_warn inside panic() commit 1a2383e8b84c0451fd9b1eec3b9aab16f30b597c upstream. In the current code, the following three places need to unset panic_on_warn before calling panic() to avoid recursive panics: kernel/kcsan/report.c: print_report() kernel/sched/core.c: __schedule_bug() mm/kfence/report.c: kfence_report_error() In order to avoid copy-pasting "panic_on_warn = 0" all over the places, it is better to move it inside panic() and then remove it from the other places. Link: https://lkml.kernel.org/r/1644324666-15947-4-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Baoquan He Cc: Jonathan Corbet Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- kernel/panic.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index f567195d45d9..960c2be2759c 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -207,6 +207,16 @@ void panic(const char *fmt, ...) int old_cpu, this_cpu; bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; + if (panic_on_warn) { + /* + * This thread may hit another WARN() in the panic path. + * Resetting this prevents additional WARN() from panicking the + * system on this thread. Other threads are blocked by the + * panic_mutex in panic(). + */ + panic_on_warn = 0; + } + /* * Disable local interrupts. This will prevent panic_smp_self_stop * from deadlocking the first cpu that invokes the panic, since @@ -618,16 +628,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, if (regs) show_regs(regs); - if (panic_on_warn) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from panicking the - * system on this thread. Other threads are blocked by the - * panic_mutex in panic(). - */ - panic_on_warn = 0; + if (panic_on_warn) panic("panic_on_warn set ...\n"); - } if (!regs) dump_stack(); From b857b42a8c0992256d3129bce141724e4eb2b438 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 24 Jan 2023 11:29:48 -0800 Subject: [PATCH 082/143] ubsan: no need to unset panic_on_warn in ubsan_epilogue() commit d83ce027a54068fabb70d2c252e1ce2da86784a4 upstream. panic_on_warn is unset inside panic(), so no need to unset it before calling panic() in ubsan_epilogue(). Link: https://lkml.kernel.org/r/1644324666-15947-5-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Baoquan He Cc: Jonathan Corbet Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- lib/ubsan.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index adf8dcf3c84e..d81d107f64f4 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -151,16 +151,8 @@ static void ubsan_epilogue(void) current->in_ubsan--; - if (panic_on_warn) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from panicking the - * system on this thread. Other threads are blocked by the - * panic_mutex in panic(). - */ - panic_on_warn = 0; + if (panic_on_warn) panic("panic_on_warn set ...\n"); - } } static void handle_overflow(struct overflow_data *data, void *lhs, From 715a63588f5602eae9ee7699ad83863cb500a019 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Tue, 24 Jan 2023 11:29:49 -0800 Subject: [PATCH 083/143] kasan: no need to unset panic_on_warn in end_report() commit e7ce7500375a63348e1d3a703c8d5003cbe3fea6 upstream. panic_on_warn is unset inside panic(), so no need to unset it before calling panic() in end_report(). Link: https://lkml.kernel.org/r/1644324666-15947-6-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Tiezhu Yang Reviewed-by: Marco Elver Cc: Andrey Ryabinin Cc: Baoquan He Cc: Jonathan Corbet Cc: Xuefeng Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- mm/kasan/report.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 00a53f1355ae..91714acea0d6 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -95,16 +95,8 @@ static void end_report(unsigned long *flags) pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irqrestore(&report_lock, *flags); - if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) { - /* - * This thread may hit another WARN() in the panic path. - * Resetting this prevents additional WARN() from panicking the - * system on this thread. Other threads are blocked by the - * panic_mutex in panic(). - */ - panic_on_warn = 0; + if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) panic("panic_on_warn set ...\n"); - } kasan_enable_current(); } From d9c740c765e5b5d27bc6e7a500430a9bd4e14e75 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 24 Jan 2023 11:29:50 -0800 Subject: [PATCH 084/143] exit: Add and use make_task_dead. commit 0e25498f8cd43c1b5aa327f373dd094e9a006da7 upstream. There are two big uses of do_exit. The first is it's design use to be the guts of the exit(2) system call. The second use is to terminate a task after something catastrophic has happened like a NULL pointer in kernel code. Add a function make_task_dead that is initialy exactly the same as do_exit to cover the cases where do_exit is called to handle catastrophic failure. In time this can probably be reduced to just a light wrapper around do_task_dead. For now keep it exactly the same so that there will be no behavioral differences introducing this new concept. Replace all of the uses of do_exit that use it for catastraphic task cleanup with make_task_dead to make it clear what the code is doing. As part of this rename rewind_stack_do_exit rewind_stack_and_make_dead. Signed-off-by: "Eric W. Biederman" Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- arch/alpha/kernel/traps.c | 6 +++--- arch/alpha/mm/fault.c | 2 +- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm64/kernel/traps.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/csky/abiv1/alignment.c | 2 +- arch/csky/kernel/traps.c | 2 +- arch/h8300/kernel/traps.c | 2 +- arch/h8300/mm/fault.c | 2 +- arch/hexagon/kernel/traps.c | 2 +- arch/ia64/kernel/mca_drv.c | 2 +- arch/ia64/kernel/traps.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/m68k/kernel/traps.c | 2 +- arch/m68k/mm/fault.c | 2 +- arch/microblaze/kernel/exceptions.c | 4 ++-- arch/mips/kernel/traps.c | 2 +- arch/nds32/kernel/fpu.c | 2 +- arch/nds32/kernel/traps.c | 8 ++++---- arch/nios2/kernel/traps.c | 4 ++-- arch/openrisc/kernel/traps.c | 2 +- arch/parisc/kernel/traps.c | 2 +- arch/powerpc/kernel/traps.c | 2 +- arch/riscv/kernel/traps.c | 2 +- arch/riscv/mm/fault.c | 2 +- arch/s390/kernel/dumpstack.c | 2 +- arch/s390/kernel/nmi.c | 2 +- arch/sh/kernel/traps.c | 2 +- arch/sparc/kernel/traps_32.c | 4 +--- arch/sparc/kernel/traps_64.c | 4 +--- arch/x86/entry/entry_32.S | 6 +++--- arch/x86/entry/entry_64.S | 6 +++--- arch/x86/kernel/dumpstack.c | 4 ++-- arch/xtensa/kernel/traps.c | 2 +- include/linux/sched/task.h | 1 + kernel/exit.c | 9 +++++++++ tools/objtool/check.c | 3 ++- 38 files changed, 59 insertions(+), 52 deletions(-) diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 921d4b6e4d95..8b0f81a58b94 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -192,7 +192,7 @@ die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) local_irq_enable(); while (1); } - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } #ifndef CONFIG_MATHEMU @@ -577,7 +577,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg, printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n", pc, va, opcode, reg); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); got_exception: /* Ok, we caught the exception, but we don't want it. Is there @@ -632,7 +632,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg, local_irq_enable(); while (1); } - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } /* diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 09172f017efc..5d42f94887da 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -204,7 +204,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, printk(KERN_ALERT "Unable to handle kernel paging request at " "virtual address %016lx\n", address); die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16); - do_exit(SIGKILL); + make_task_dead(SIGKILL); /* We ran out of memory, or some other thing happened to us that made us unable to handle the page fault gracefully. */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index a531afad87fd..7878c33e188d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -348,7 +348,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) if (panic_on_oops) panic("Fatal exception"); if (signr) - do_exit(signr); + make_task_dead(signr); } /* diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index efa402025031..af5177801fb1 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -125,7 +125,7 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, show_pte(KERN_ALERT, mm, addr); die("Oops", regs, fsr); bust_spinlocks(0); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } /* diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 2059d8f43f55..2cdd53425509 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -144,7 +144,7 @@ void die(const char *str, struct pt_regs *regs, int err) raw_spin_unlock_irqrestore(&die_lock, flags); if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } static void arm64_show_signal(int signo, const char *str) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 795d224f184f..2be856731e81 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -293,7 +293,7 @@ static void die_kernel_fault(const char *msg, unsigned long addr, show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } static void __do_kernel_fault(unsigned long addr, unsigned int esr, diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c index cb2a0d94a144..5e2fb45d605c 100644 --- a/arch/csky/abiv1/alignment.c +++ b/arch/csky/abiv1/alignment.c @@ -294,7 +294,7 @@ void csky_alignment(struct pt_regs *regs) __func__, opcode, rz, rx, imm, addr); show_regs(regs); bust_spinlocks(0); - do_exit(SIGKILL); + make_dead_task(SIGKILL); } force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index 22721468a04b..3c648305f2c3 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -111,7 +111,7 @@ void die(struct pt_regs *regs, const char *str) if (panic_on_oops) panic("Fatal exception"); if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + make_dead_task(SIGSEGV); } void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 5d8b969cd8f3..2b1366c958e3 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -110,7 +110,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err) dump(fp); spin_unlock_irq(&die_lock); - do_exit(SIGSEGV); + make_dead_task(SIGSEGV); } static int kstack_depth_to_print = 24; diff --git a/arch/h8300/mm/fault.c b/arch/h8300/mm/fault.c index d4bc9c16f2df..0223528565dd 100644 --- a/arch/h8300/mm/fault.c +++ b/arch/h8300/mm/fault.c @@ -51,7 +51,7 @@ asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address, printk(" at virtual address %08lx\n", address); if (!user_mode(regs)) die("Oops", regs, error_code); - do_exit(SIGKILL); + make_dead_task(SIGKILL); return 1; } diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 904134b37232..25e8bdbfd685 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -218,7 +218,7 @@ int die(const char *str, struct pt_regs *regs, long err) panic("Fatal exception"); oops_exit(); - do_exit(err); + make_dead_task(err); return 0; } diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 2a40268c3d49..d9ee3b186249 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -176,7 +176,7 @@ mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) spin_unlock(&mca_bh_lock); /* This process is about to be killed itself */ - do_exit(SIGKILL); + make_task_dead(SIGKILL); } /** diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index e13cb905930f..753642366e12 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -85,7 +85,7 @@ die (const char *str, struct pt_regs *regs, long err) if (panic_on_oops) panic("Fatal exception"); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); return 0; } diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index cd9766d2b6e0..829198180ca6 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -274,7 +274,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re regs = NULL; bust_spinlocks(0); if (regs) - do_exit(SIGKILL); + make_task_dead(SIGKILL); return; out_of_memory: diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 9e1261462bcc..b2a31afb998c 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1136,7 +1136,7 @@ void die_if_kernel (char *str, struct pt_regs *fp, int nr) pr_crit("%s: %08x\n", str, nr); show_registers(fp); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } asmlinkage void set_esp0(unsigned long ssp) diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index ef46e77e97a5..fcb3a0d8421c 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -48,7 +48,7 @@ int send_fault_sig(struct pt_regs *regs) pr_alert("Unable to handle kernel access"); pr_cont(" at virtual address %p\n", addr); die_if_kernel("Oops", regs, 0 /*error_code*/); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } return 1; diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index cf99c411503e..6d3a6a644220 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -44,10 +44,10 @@ void die(const char *str, struct pt_regs *fp, long err) pr_warn("Oops: %s, sig: %ld\n", str, err); show_regs(fp); spin_unlock_irq(&die_lock); - /* do_exit() should take care of panic'ing from an interrupt + /* make_task_dead() should take care of panic'ing from an interrupt * context so we don't handle it here */ - do_exit(err); + make_task_dead(err); } /* for user application debugging */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b1fe4518bd22..ebd0101f0958 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -413,7 +413,7 @@ void __noreturn die(const char *str, struct pt_regs *regs) if (regs && kexec_should_crash(current)) crash_kexec(regs); - do_exit(sig); + make_task_dead(sig); } extern struct exception_table_entry __start___dbe_table[]; diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c index 9edd7ed7d7bf..701c09a668de 100644 --- a/arch/nds32/kernel/fpu.c +++ b/arch/nds32/kernel/fpu.c @@ -223,7 +223,7 @@ inline void handle_fpu_exception(struct pt_regs *regs) } } else if (fpcsr & FPCSR_mskRIT) { if (!user_mode(regs)) - do_exit(SIGILL); + make_task_dead(SIGILL); si_signo = SIGILL; } diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 6a9772ba7392..12cdd6549360 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -185,7 +185,7 @@ void die(const char *str, struct pt_regs *regs, int err) bust_spinlocks(0); spin_unlock_irq(&die_lock); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } EXPORT_SYMBOL(die); @@ -289,7 +289,7 @@ void unhandled_interruption(struct pt_regs *regs) pr_emerg("unhandled_interruption\n"); show_regs(regs); if (!user_mode(regs)) - do_exit(SIGKILL); + make_task_dead(SIGKILL); force_sig(SIGKILL); } @@ -300,7 +300,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr, addr, type); show_regs(regs); if (!user_mode(regs)) - do_exit(SIGKILL); + make_task_dead(SIGKILL); force_sig(SIGKILL); } @@ -327,7 +327,7 @@ void do_revinsn(struct pt_regs *regs) pr_emerg("Reserved Instruction\n"); show_regs(regs); if (!user_mode(regs)) - do_exit(SIGILL); + make_task_dead(SIGILL); force_sig(SIGILL); } diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index b172da4eb1a9..86208178024f 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -37,10 +37,10 @@ void die(const char *str, struct pt_regs *regs, long err) show_regs(regs); spin_unlock_irq(&die_lock); /* - * do_exit() should take care of panic'ing from an interrupt + * make_task_dead() should take care of panic'ing from an interrupt * context so we don't handle it here */ - do_exit(err); + make_task_dead(err); } void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr) diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 206e5325e61b..fca5317f3ce1 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -212,7 +212,7 @@ void die(const char *str, struct pt_regs *regs, long err) __asm__ __volatile__("l.nop 1"); do {} while (1); #endif - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } /* This is normally the 'Oops' routine */ diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index bce47e0fb692..2fad7867af10 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -268,7 +268,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) panic("Fatal exception"); oops_exit(); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } /* gdb uses break 4,8 */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 069d451240fa..5e5a2448ae79 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -245,7 +245,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, if (panic_on_oops) panic("Fatal exception"); - do_exit(signr); + make_task_dead(signr); } NOKPROBE_SYMBOL(oops_end); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index c1a13011fb8e..23fe03ca7ec7 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -57,7 +57,7 @@ void die(struct pt_regs *regs, const char *str) if (panic_on_oops) panic("Fatal exception"); if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 8f84bbe0ac33..54b12943cc7b 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -34,7 +34,7 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); die(regs, "Oops"); - do_exit(SIGKILL); + make_task_dead(SIGKILL); } static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 0dc4b258b98d..763e726025b3 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -214,5 +214,5 @@ void die(struct pt_regs *regs, const char *str) if (panic_on_oops) panic("Fatal exception: panic_on_oops"); oops_exit(); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 86c8d5370e7f..0102376eca3d 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -178,7 +178,7 @@ void s390_handle_mcck(void) "malfunction (code 0x%016lx).\n", mcck.mcck_code); printk(KERN_EMERG "mcck: task: %s, pid: %d.\n", current->comm, current->pid); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } } EXPORT_SYMBOL_GPL(s390_handle_mcck); diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 9c3d32b80038..4efffc18c851 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -57,7 +57,7 @@ void die(const char *str, struct pt_regs *regs, long err) if (panic_on_oops) panic("Fatal exception"); - do_exit(SIGSEGV); + make_task_dead(SIGSEGV); } void die_if_kernel(const char *str, struct pt_regs *regs, long err) diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 247a0d9683b2..5d47f4a34226 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -86,9 +86,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs) } printk("Instruction DUMP:"); instruction_dump ((unsigned long *) regs->pc); - if(regs->psr & PSR_PS) - do_exit(SIGKILL); - do_exit(SIGSEGV); + make_task_dead((regs->psr & PSR_PS) ? SIGKILL : SIGSEGV); } void do_hw_interrupt(struct pt_regs *regs, unsigned long type) diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index a850dccd78ea..814277d0e3e8 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2564,9 +2564,7 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs) } if (panic_on_oops) panic("Fatal exception"); - if (regs->tstate & TSTATE_PRIV) - do_exit(SIGKILL); - do_exit(SIGSEGV); + make_task_dead((regs->tstate & TSTATE_PRIV)? SIGKILL : SIGSEGV); } EXPORT_SYMBOL(die_if_kernel); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 8fcd6a42b3a1..70bd81b6c612 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1333,14 +1333,14 @@ SYM_CODE_START(asm_exc_nmi) SYM_CODE_END(asm_exc_nmi) .pushsection .text, "ax" -SYM_CODE_START(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp movl PER_CPU_VAR(cpu_current_top_of_stack), %esi leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp - call do_exit + call make_task_dead 1: jmp 1b -SYM_CODE_END(rewind_stack_do_exit) +SYM_CODE_END(rewind_stack_and_make_dead) .popsection diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 559c82b83475..23212c53cef7 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1509,7 +1509,7 @@ SYM_CODE_END(ignore_sysret) #endif .pushsection .text, "ax" -SYM_CODE_START(rewind_stack_do_exit) +SYM_CODE_START(rewind_stack_and_make_dead) UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp @@ -1518,6 +1518,6 @@ SYM_CODE_START(rewind_stack_do_exit) leaq -PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS - call do_exit -SYM_CODE_END(rewind_stack_do_exit) + call make_task_dead +SYM_CODE_END(rewind_stack_and_make_dead) .popsection diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 97aa900386cb..b4964300153a 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -351,7 +351,7 @@ unsigned long oops_begin(void) } NOKPROBE_SYMBOL(oops_begin); -void __noreturn rewind_stack_do_exit(int signr); +void __noreturn rewind_stack_and_make_dead(int signr); void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { @@ -386,7 +386,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) * reuse the task stack and that existing poisons are invalid. */ kasan_unpoison_task_stack(current); - rewind_stack_do_exit(signr); + rewind_stack_and_make_dead(signr); } NOKPROBE_SYMBOL(oops_end); diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index efc3a29cde80..129f23c0ab55 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -545,5 +545,5 @@ void die(const char * str, struct pt_regs * regs, long err) if (panic_on_oops) panic("Fatal exception"); - do_exit(err); + make_task_dead(err); } diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 4ce511437a8a..2832cc6be062 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -61,6 +61,7 @@ extern void sched_post_fork(struct task_struct *p, extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); +void __noreturn make_task_dead(int signr); extern void proc_caches_init(void); diff --git a/kernel/exit.c b/kernel/exit.c index 8989e1d1f79b..8d7577940077 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -863,6 +863,15 @@ void __noreturn do_exit(long code) } EXPORT_SYMBOL_GPL(do_exit); +void __noreturn make_task_dead(int signr) +{ + /* + * Take the task off the cpu after something catastrophic has + * happened. + */ + do_exit(signr); +} + void complete_and_exit(struct completion *comp, long code) { if (comp) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 700984e7f5ba..3c2baeb86c57 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -168,6 +168,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "panic", "do_exit", "do_task_dead", + "make_task_dead", "__module_put_and_exit", "complete_and_exit", "__reiserfs_panic", @@ -175,7 +176,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "fortify_panic", "usercopy_abort", "machine_real_restart", - "rewind_stack_do_exit", + "rewind_stack_and_make_dead" "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", From b2c178f31162abe0c1123b815b50059f2d1163dd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 24 Jan 2023 11:29:51 -0800 Subject: [PATCH 085/143] objtool: Add a missing comma to avoid string concatenation commit 1fb466dff904e4a72282af336f2c355f011eec61 upstream. Recently the kbuild robot reported two new errors: >> lib/kunit/kunit-example-test.o: warning: objtool: .text.unlikely: unexpected end of section >> arch/x86/kernel/dumpstack.o: warning: objtool: oops_end() falls through to next function show_opcodes() I don't know why they did not occur in my test setup but after digging it I realized I had accidentally dropped a comma in tools/objtool/check.c when I renamed rewind_stack_do_exit to rewind_stack_and_make_dead. Add that comma back to fix objtool errors. Link: https://lkml.kernel.org/r/202112140949.Uq5sFKR1-lkp@intel.com Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Reported-by: kernel test robot Signed-off-by: "Eric W. Biederman" Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3c2baeb86c57..985bcc5cea8a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -176,7 +176,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "fortify_panic", "usercopy_abort", "machine_real_restart", - "rewind_stack_and_make_dead" + "rewind_stack_and_make_dead", "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", From 63d77c559678f9cff5f689728c8c5cf4af7563ca Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Jan 2023 11:29:52 -0800 Subject: [PATCH 086/143] hexagon: Fix function name in die() commit 4f0712ccec09c071e221242a2db9a6779a55a949 upstream. When building ARCH=hexagon defconfig: arch/hexagon/kernel/traps.c:217:2: error: implicit declaration of function 'make_dead_task' [-Werror,-Wimplicit-function-declaration] make_dead_task(err); ^ The function's name is make_task_dead(), change it so there is no more build error. Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Nathan Chancellor Link: https://lkml.kernel.org/r/20211227184851.2297759-2-nathan@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- arch/hexagon/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 25e8bdbfd685..b334e8071709 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -218,7 +218,7 @@ int die(const char *str, struct pt_regs *regs, long err) panic("Fatal exception"); oops_exit(); - make_dead_task(err); + make_task_dead(err); return 0; } From 648d8b8c4938c88fef0747dee1166dbaf4e97c4c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Jan 2023 11:29:53 -0800 Subject: [PATCH 087/143] h8300: Fix build errors from do_exit() to make_task_dead() transition commit ab4ababdf77ccc56c7301c751dff49c79709c51c upstream. When building ARCH=h8300 defconfig: arch/h8300/kernel/traps.c: In function 'die': arch/h8300/kernel/traps.c:109:2: error: implicit declaration of function 'make_dead_task' [-Werror=implicit-function-declaration] 109 | make_dead_task(SIGSEGV); | ^~~~~~~~~~~~~~ arch/h8300/mm/fault.c: In function 'do_page_fault': arch/h8300/mm/fault.c:54:2: error: implicit declaration of function 'make_dead_task' [-Werror=implicit-function-declaration] 54 | make_dead_task(SIGKILL); | ^~~~~~~~~~~~~~ The function's name is make_task_dead(), change it so there is no more build error. Additionally, include linux/sched/task.h in arch/h8300/kernel/traps.c to avoid the same error because do_exit()'s declaration is in kernel.h but make_task_dead()'s is in task.h, which is not included in traps.c. Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Nathan Chancellor Link: https://lkml.kernel.org/r/20211227184851.2297759-3-nathan@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- arch/h8300/kernel/traps.c | 3 ++- arch/h8300/mm/fault.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 2b1366c958e3..cf23ccb50c17 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -110,7 +111,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err) dump(fp); spin_unlock_irq(&die_lock); - make_dead_task(SIGSEGV); + make_task_dead(SIGSEGV); } static int kstack_depth_to_print = 24; diff --git a/arch/h8300/mm/fault.c b/arch/h8300/mm/fault.c index 0223528565dd..b465441f490d 100644 --- a/arch/h8300/mm/fault.c +++ b/arch/h8300/mm/fault.c @@ -51,7 +51,7 @@ asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address, printk(" at virtual address %08lx\n", address); if (!user_mode(regs)) die("Oops", regs, error_code); - make_dead_task(SIGKILL); + make_task_dead(SIGKILL); return 1; } From 6d971830da31148070a16577371d73fff5c16a57 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Jan 2023 11:29:54 -0800 Subject: [PATCH 088/143] csky: Fix function name in csky_alignment() and die() commit 751971af2e3615dc5bd12674080bc795505fefeb upstream. When building ARCH=csky defconfig: arch/csky/kernel/traps.c: In function 'die': arch/csky/kernel/traps.c:112:17: error: implicit declaration of function 'make_dead_task' [-Werror=implicit-function-declaration] 112 | make_dead_task(SIGSEGV); | ^~~~~~~~~~~~~~ The function's name is make_task_dead(), change it so there is no more build error. Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Nathan Chancellor Reviewed-by: Guo Ren Link: https://lkml.kernel.org/r/20211227184851.2297759-4-nathan@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- arch/csky/abiv1/alignment.c | 2 +- arch/csky/kernel/traps.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c index 5e2fb45d605c..2df115d0e210 100644 --- a/arch/csky/abiv1/alignment.c +++ b/arch/csky/abiv1/alignment.c @@ -294,7 +294,7 @@ void csky_alignment(struct pt_regs *regs) __func__, opcode, rz, rx, imm, addr); show_regs(regs); bust_spinlocks(0); - make_dead_task(SIGKILL); + make_task_dead(SIGKILL); } force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index 3c648305f2c3..15711efa14a4 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -111,7 +111,7 @@ void die(struct pt_regs *regs, const char *str) if (panic_on_oops) panic("Fatal exception"); if (ret != NOTIFY_STOP) - make_dead_task(SIGSEGV); + make_task_dead(SIGSEGV); } void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) From 1b9a33a94b9c686616c3dc14f795488924ba39cd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 24 Jan 2023 11:29:55 -0800 Subject: [PATCH 089/143] ia64: make IA64_MCA_RECOVERY bool instead of tristate commit dbecf9b8b8ce580f4e11afed9d61e8aa294cddd2 upstream. In linux-next, IA64_MCA_RECOVERY uses the (new) function make_task_dead(), which is not exported for use by modules. Instead of exporting it for one user, convert IA64_MCA_RECOVERY to be a bool Kconfig symbol. In a config file from "kernel test robot " for a different problem, this linker error was exposed when CONFIG_IA64_MCA_RECOVERY=m. Fixes this build error: ERROR: modpost: "make_task_dead" [arch/ia64/kernel/mca_recovery.ko] undefined! Link: https://lkml.kernel.org/r/20220124213129.29306-1-rdunlap@infradead.org Fixes: 0e25498f8cd4 ("exit: Add and use make_task_dead.") Signed-off-by: Randy Dunlap Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: "Eric W. Biederman" Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- arch/ia64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 39b25a5a591b..1d0579bc9d65 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -361,7 +361,7 @@ config ARCH_PROC_KCORE_TEXT depends on PROC_KCORE config IA64_MCA_RECOVERY - tristate "MCA recovery from errors other than TLB." + bool "MCA recovery from errors other than TLB." config IA64_PALINFO tristate "/proc/pal support" From 191a3b17dd9b55dc92283935247f8a854e6fffba Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2023 11:29:56 -0800 Subject: [PATCH 090/143] panic: Separate sysctl logic from CONFIG_SMP commit 9360d035a579d95d1e76c471061b9065b18a0eb1 upstream. In preparation for adding more sysctls directly in kernel/panic.c, split CONFIG_SMP from the logic that adds sysctls. Cc: Petr Mladek Cc: Andrew Morton Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: Sebastian Andrzej Siewior Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-1-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- kernel/panic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 960c2be2759c..09f0802212c3 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -72,8 +72,9 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); -#if defined(CONFIG_SMP) && defined(CONFIG_SYSCTL) +#ifdef CONFIG_SYSCTL static struct ctl_table kern_panic_table[] = { +#ifdef CONFIG_SMP { .procname = "oops_all_cpu_backtrace", .data = &sysctl_oops_all_cpu_backtrace, @@ -83,6 +84,7 @@ static struct ctl_table kern_panic_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#endif { } }; From de586785b981d24ccc432becd1cdb55c81bb09fa Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 24 Jan 2023 11:29:57 -0800 Subject: [PATCH 091/143] exit: Put an upper limit on how often we can oops commit d4ccd54d28d3c8598e2354acc13e28c060961dbb upstream. Many Linux systems are configured to not panic on oops; but allowing an attacker to oops the system **really** often can make even bugs that look completely unexploitable exploitable (like NULL dereferences and such) if each crash elevates a refcount by one or a lock is taken in read mode, and this causes a counter to eventually overflow. The most interesting counters for this are 32 bits wide (like open-coded refcounts that don't use refcount_t). (The ldsem reader count on 32-bit platforms is just 16 bits, but probably nobody cares about 32-bit platforms that much nowadays.) So let's panic the system if the kernel is constantly oopsing. The speed of oopsing 2^32 times probably depends on several factors, like how long the stack trace is and which unwinder you're using; an empirically important one is whether your console is showing a graphical environment or a text console that oopses will be printed to. In a quick single-threaded benchmark, it looks like oopsing in a vfork() child with a very short stack trace only takes ~510 microseconds per run when a graphical console is active; but switching to a text console that oopses are printed to slows it down around 87x, to ~45 milliseconds per run. (Adding more threads makes this faster, but the actual oops printing happens under &die_lock on x86, so you can maybe speed this up by a factor of around 2 and then any further improvement gets eaten up by lock contention.) It looks like it would take around 8-12 days to overflow a 32-bit counter with repeated oopsing on a multi-core X86 system running a graphical environment; both me (in an X86 VM) and Seth (with a distro kernel on normal hardware in a standard configuration) got numbers in that ballpark. 12 days aren't *that* short on a desktop system, and you'd likely need much longer on a typical server system (assuming that people don't run graphical desktop environments on their servers), and this is a *very* noisy and violent approach to exploiting the kernel; and it also seems to take orders of magnitude longer on some machines, probably because stuff like EFI pstore will slow it down a ton if that's active. Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20221107201317.324457-1-jannh@google.com Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-2-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- Documentation/admin-guide/sysctl/kernel.rst | 8 ++++ kernel/exit.c | 43 +++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index a4b1ebc2e70b..cd9247b48fc7 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -663,6 +663,14 @@ This is the default behavior. an oops event is detected. +oops_limit +========== + +Number of kernel oopses after which the kernel should panic when +``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect +as setting ``panic_on_oops=1``. + + osrelease, ostype & version =========================== diff --git a/kernel/exit.c b/kernel/exit.c index 8d7577940077..db832cff6b7b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -69,6 +69,33 @@ #include #include +/* + * The default value should be high enough to not crash a system that randomly + * crashes its kernel from time to time, but low enough to at least not permit + * overflowing 32-bit refcounts or the ldsem writer count. + */ +static unsigned int oops_limit = 10000; + +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_exit_table[] = { + { + .procname = "oops_limit", + .data = &oops_limit, + .maxlen = sizeof(oops_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { } +}; + +static __init int kernel_exit_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_exit_table); + return 0; +} +late_initcall(kernel_exit_sysctls_init); +#endif + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -865,10 +892,26 @@ EXPORT_SYMBOL_GPL(do_exit); void __noreturn make_task_dead(int signr) { + static atomic_t oops_count = ATOMIC_INIT(0); + /* * Take the task off the cpu after something catastrophic has * happened. */ + + /* + * Every time the system oopses, if the oops happens while a reference + * to an object was held, the reference leaks. + * If the oops doesn't also leak memory, repeated oopsing can cause + * reference counters to wrap around (if they're not using refcount_t). + * This means that repeated oopsing can make unexploitable-looking bugs + * exploitable through repeated oopsing. + * To make sure this can't happen, place an upper bound on how often the + * kernel may oops without panic(). + */ + if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) + panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); + do_exit(signr); } From 7cffbcd68f1c1579173e6d4d6fdfd5c99a4f5ba5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2023 11:29:58 -0800 Subject: [PATCH 092/143] exit: Expose "oops_count" to sysfs commit 9db89b41117024f80b38b15954017fb293133364 upstream. Since Oops count is now tracked and is a fairly interesting signal, add the entry /sys/kernel/oops_count to expose it to userspace. Cc: "Eric W. Biederman" Cc: Jann Horn Cc: Arnd Bergmann Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-3-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- .../ABI/testing/sysfs-kernel-oops_count | 6 +++++ kernel/exit.c | 22 +++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-oops_count diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count new file mode 100644 index 000000000000..156cca9dbc96 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-oops_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Oopsed since last boot. diff --git a/kernel/exit.c b/kernel/exit.c index db832cff6b7b..b519abee2c54 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -96,6 +97,25 @@ static __init int kernel_exit_sysctls_init(void) late_initcall(kernel_exit_sysctls_init); #endif +static atomic_t oops_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&oops_count)); +} + +static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count); + +static __init int kernel_exit_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_exit_sysfs_init); +#endif + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -892,8 +912,6 @@ EXPORT_SYMBOL_GPL(do_exit); void __noreturn make_task_dead(int signr) { - static atomic_t oops_count = ATOMIC_INIT(0); - /* * Take the task off the cpu after something catastrophic has * happened. From 530cdae5c2b2cc30bd09054edd75abf515564354 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2023 11:29:59 -0800 Subject: [PATCH 093/143] exit: Allow oops_limit to be disabled commit de92f65719cd672f4b48397540b9f9eff67eca40 upstream. In preparation for keeping oops_limit logic in sync with warn_limit, have oops_limit == 0 disable checking the Oops counter. Cc: Jann Horn Cc: Jonathan Corbet Cc: Andrew Morton Cc: Baolin Wang Cc: "Jason A. Donenfeld" Cc: Eric Biggers Cc: Huang Ying Cc: "Eric W. Biederman" Cc: Arnd Bergmann Cc: linux-doc@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- Documentation/admin-guide/sysctl/kernel.rst | 5 +++-- kernel/exit.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index cd9247b48fc7..470262c08858 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -667,8 +667,9 @@ oops_limit ========== Number of kernel oopses after which the kernel should panic when -``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect -as setting ``panic_on_oops=1``. +``panic_on_oops`` is not set. Setting this to 0 disables checking +the count. Setting this to 1 has the same effect as setting +``panic_on_oops=1``. The default value is 10000. osrelease, ostype & version diff --git a/kernel/exit.c b/kernel/exit.c index b519abee2c54..8c820aa7b9c5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -927,7 +927,7 @@ void __noreturn make_task_dead(int signr) * To make sure this can't happen, place an upper bound on how often the * kernel may oops without panic(). */ - if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) + if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); do_exit(signr); From 55eba18262cbc289ded9e7a8fe1b61a92140b59a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2023 11:30:00 -0800 Subject: [PATCH 094/143] panic: Consolidate open-coded panic_on_warn checks commit 79cc1ba7badf9e7a12af99695a557e9ce27ee967 upstream. Several run-time checkers (KASAN, UBSAN, KFENCE, KCSAN, sched) roll their own warnings, and each check "panic_on_warn". Consolidate this into a single function so that future instrumentation can be added in a single location. Cc: Marco Elver Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Daniel Bristot de Oliveira Cc: Valentin Schneider Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Vincenzo Frascino Cc: Andrew Morton Cc: David Gow Cc: tangmeng Cc: Jann Horn Cc: Shuah Khan Cc: Petr Mladek Cc: "Paul E. McKenney" Cc: Sebastian Andrzej Siewior Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: kasan-dev@googlegroups.com Cc: linux-mm@kvack.org Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Reviewed-by: Marco Elver Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20221117234328.594699-4-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- include/linux/kernel.h | 1 + kernel/kcsan/report.c | 4 ++-- kernel/panic.c | 9 +++++++-- kernel/sched/core.c | 3 +-- lib/ubsan.c | 3 +-- mm/kasan/report.c | 4 ++-- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 084d97070ed9..394f10fc29aa 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -320,6 +320,7 @@ extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); +void check_panic_on_warn(const char *origin); extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index d3bf87e6007c..069830f5a5d2 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -630,8 +630,8 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && print_report(value_change, type, &ai, other_info); - if (reported && panic_on_warn) - panic("panic_on_warn set ...\n"); + if (reported) + check_panic_on_warn("KCSAN"); release_report(&flags, other_info); } diff --git a/kernel/panic.c b/kernel/panic.c index 09f0802212c3..0da47888f72e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -192,6 +192,12 @@ static void panic_print_sys_info(void) ftrace_dump(DUMP_ALL); } +void check_panic_on_warn(const char *origin) +{ + if (panic_on_warn) + panic("%s: panic_on_warn set ...\n", origin); +} + /** * panic - halt the system * @fmt: The text string to print @@ -630,8 +636,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, if (regs) show_regs(regs); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("kernel"); if (!regs) dump_stack(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a875bc59804e..1303a2607f1f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4280,8 +4280,7 @@ static noinline void __schedule_bug(struct task_struct *prev) pr_err("Preemption disabled at:"); print_ip_sym(KERN_ERR, preempt_disable_ip); } - if (panic_on_warn) - panic("scheduling while atomic\n"); + check_panic_on_warn("scheduling while atomic"); dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); diff --git a/lib/ubsan.c b/lib/ubsan.c index d81d107f64f4..ee14c46cac89 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -151,8 +151,7 @@ static void ubsan_epilogue(void) current->in_ubsan--; - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("UBSAN"); } static void handle_overflow(struct overflow_data *data, void *lhs, diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 91714acea0d6..2f5e96ac4d00 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -95,8 +95,8 @@ static void end_report(unsigned long *flags) pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irqrestore(&report_lock, *flags); - if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) - panic("panic_on_warn set ...\n"); + if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) + check_panic_on_warn("KASAN"); kasan_enable_current(); } From 8c99d4c4c192770e7ea622220e36f53b85152a75 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2023 11:30:01 -0800 Subject: [PATCH 095/143] panic: Introduce warn_limit commit 9fc9e278a5c0b708eeffaf47d6eb0c82aa74ed78 upstream. Like oops_limit, add warn_limit for limiting the number of warnings when panic_on_warn is not set. Cc: Jonathan Corbet Cc: Andrew Morton Cc: Baolin Wang Cc: "Jason A. Donenfeld" Cc: Eric Biggers Cc: Huang Ying Cc: Petr Mladek Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: Sebastian Andrzej Siewior Cc: linux-doc@vger.kernel.org Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-5-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- Documentation/admin-guide/sysctl/kernel.rst | 10 ++++++++++ kernel/panic.c | 14 ++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 470262c08858..6b0c7b650dea 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1478,6 +1478,16 @@ entry will default to 2 instead of 0. 2 Unprivileged calls to ``bpf()`` are disabled = ============================================================= + +warn_limit +========== + +Number of kernel warnings after which the kernel should panic when +``panic_on_warn`` is not set. Setting this to 0 disables checking +the warning count. Setting this to 1 has the same effect as setting +``panic_on_warn=1``. The default value is 0. + + watchdog ======== diff --git a/kernel/panic.c b/kernel/panic.c index 0da47888f72e..e341366bd3e8 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -56,6 +56,7 @@ bool crash_kexec_post_notifiers; int panic_on_warn __read_mostly; unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; +static unsigned int warn_limit __read_mostly; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -85,6 +86,13 @@ static struct ctl_table kern_panic_table[] = { .extra2 = SYSCTL_ONE, }, #endif + { + .procname = "warn_limit", + .data = &warn_limit, + .maxlen = sizeof(warn_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, { } }; @@ -194,8 +202,14 @@ static void panic_print_sys_info(void) void check_panic_on_warn(const char *origin) { + static atomic_t warn_count = ATOMIC_INIT(0); + if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); + + if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) + panic("%s: system warned too often (kernel.warn_limit is %d)", + origin, warn_limit); } /** From b0bd5dcfa608cb6896d1a43baaf532dc80e0b657 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2023 11:30:02 -0800 Subject: [PATCH 096/143] panic: Expose "warn_count" to sysfs commit 8b05aa26336113c4cea25f1c333ee8cd4fc212a6 upstream. Since Warn count is now tracked and is a fairly interesting signal, add the entry /sys/kernel/warn_count to expose it to userspace. Cc: Petr Mladek Cc: Andrew Morton Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Sebastian Andrzej Siewior Cc: Tiezhu Yang Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-6-keescook@chromium.org Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- .../ABI/testing/sysfs-kernel-warn_count | 6 +++++ kernel/panic.c | 22 +++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-warn_count diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count new file mode 100644 index 000000000000..08f083d2fd51 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-warn_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Warned since last boot. diff --git a/kernel/panic.c b/kernel/panic.c index e341366bd3e8..6e30455eb2e7 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #define PANIC_TIMER_STEP 100 @@ -104,6 +105,25 @@ static __init int kernel_panic_sysctls_init(void) late_initcall(kernel_panic_sysctls_init); #endif +static atomic_t warn_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&warn_count)); +} + +static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count); + +static __init int kernel_panic_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_panic_sysfs_init); +#endif + static long no_blink(int state) { return 0; @@ -202,8 +222,6 @@ static void panic_print_sys_info(void) void check_panic_on_warn(const char *origin) { - static atomic_t warn_count = ATOMIC_INIT(0); - if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); From 53f177b50449ab741599e402759d413f40b4073d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2023 11:30:03 -0800 Subject: [PATCH 097/143] docs: Fix path paste-o for /sys/kernel/warn_count commit 00dd027f721e0458418f7750d8a5a664ed3e5994 upstream. Running "make htmldocs" shows that "/sys/kernel/oops_count" was duplicated. This should have been "warn_count": Warning: /sys/kernel/oops_count is defined 2 times: ./Documentation/ABI/testing/sysfs-kernel-warn_count:0 ./Documentation/ABI/testing/sysfs-kernel-oops_count:0 Fix the typo. Reported-by: kernel test robot Link: https://lore.kernel.org/linux-doc/202212110529.A3Qav8aR-lkp@intel.com Fixes: 8b05aa263361 ("panic: Expose "warn_count" to sysfs") Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- Documentation/ABI/testing/sysfs-kernel-warn_count | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count index 08f083d2fd51..90a029813717 100644 --- a/Documentation/ABI/testing/sysfs-kernel-warn_count +++ b/Documentation/ABI/testing/sysfs-kernel-warn_count @@ -1,4 +1,4 @@ -What: /sys/kernel/oops_count +What: /sys/kernel/warn_count Date: November 2022 KernelVersion: 6.2.0 Contact: Linux Kernel Hardening List From b98a8b731bd2a97a2b59d079d14b25472dd0acfd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Jan 2023 11:30:04 -0800 Subject: [PATCH 098/143] exit: Use READ_ONCE() for all oops/warn limit reads commit 7535b832c6399b5ebfc5b53af5c51dd915ee2538 upstream. Use a temporary variable to take full advantage of READ_ONCE() behavior. Without this, the report (and even the test) might be out of sync with the initial test. Reported-by: Peter Zijlstra Link: https://lore.kernel.org/lkml/Y5x7GXeluFmZ8E0E@hirez.programming.kicks-ass.net Fixes: 9fc9e278a5c0 ("panic: Introduce warn_limit") Fixes: d4ccd54d28d3 ("exit: Put an upper limit on how often we can oops") Cc: "Eric W. Biederman" Cc: Jann Horn Cc: Arnd Bergmann Cc: Petr Mladek Cc: Andrew Morton Cc: Luis Chamberlain Cc: Marco Elver Cc: tangmeng Cc: Sebastian Andrzej Siewior Cc: Tiezhu Yang Signed-off-by: Kees Cook Signed-off-by: Eric Biggers Signed-off-by: Sasha Levin --- kernel/exit.c | 6 ++++-- kernel/panic.c | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 8c820aa7b9c5..bacdaf980933 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -916,6 +916,7 @@ void __noreturn make_task_dead(int signr) * Take the task off the cpu after something catastrophic has * happened. */ + unsigned int limit; /* * Every time the system oopses, if the oops happens while a reference @@ -927,8 +928,9 @@ void __noreturn make_task_dead(int signr) * To make sure this can't happen, place an upper bound on how often the * kernel may oops without panic(). */ - if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) - panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); + limit = READ_ONCE(oops_limit); + if (atomic_inc_return(&oops_count) >= limit && limit) + panic("Oopsed too often (kernel.oops_limit is %d)", limit); do_exit(signr); } diff --git a/kernel/panic.c b/kernel/panic.c index 6e30455eb2e7..bc39e2b27d31 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -222,12 +222,15 @@ static void panic_print_sys_info(void) void check_panic_on_warn(const char *origin) { + unsigned int limit; + if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); - if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) + limit = READ_ONCE(warn_limit); + if (atomic_inc_return(&warn_count) >= limit && limit) panic("%s: system warned too often (kernel.warn_limit is %d)", - origin, warn_limit); + origin, limit); } /** From 1d580d3e1311c5d28f0f28137cbcb999e9d446d8 Mon Sep 17 00:00:00 2001 From: Archie Pusaka Date: Thu, 26 Jan 2023 16:38:17 +0300 Subject: [PATCH 099/143] Bluetooth: hci_sync: cancel cmd_timer if hci_open failed commit 97dfaf073f5881c624856ef293be307b6166115c upstream. If a command is already sent, we take care of freeing it, but we also need to cancel the timeout as well. Signed-off-by: Archie Pusaka Reviewed-by: Abhishek Pandit-Subedi Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Fedor Pchelkin Signed-off-by: Sasha Levin --- net/bluetooth/hci_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2af1477a05ca..08c473aa0113 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1623,6 +1623,7 @@ static int hci_dev_do_open(struct hci_dev *hdev) hdev->flush(hdev); if (hdev->sent_cmd) { + cancel_delayed_work_sync(&hdev->cmd_timer); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = NULL; } From 6da7055826a10b3c1de7c7cc22d509ab41310b8b Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 10 May 2022 14:46:30 +0530 Subject: [PATCH 100/143] xhci: Set HCD flag to defer primary roothub registration commit b7a4f9b5d0e4b6dd937678c546c0b322dd1a4054 upstream. Set "HCD_FLAG_DEFER_RH_REGISTER" to hcd->flags in xhci_run() to defer registering primary roothub in usb_add_hcd() if xhci has two roothubs. This will make sure both primary roothub and secondary roothub will be registered along with the second HCD. This is required for cold plugged USB devices to be detected in certain PCIe USB cards (like Inateck USB card connected to AM64 EVM or J7200 EVM). This patch has been added and reverted earier as it triggered a race in usb device enumeration. That race is now fixed in 5.16-rc3, and in stable back to 5.4 commit 6cca13de26ee ("usb: hub: Fix locking issues with address0_mutex") commit 6ae6dc22d2d1 ("usb: hub: Fix usb enumeration issue due to address0 race") [minor rebase change, and commit message update -Mathias] CC: stable@vger.kernel.org # 5.4+ Suggested-by: Mathias Nyman Tested-by: Chris Chiu Signed-off-by: Kishon Vijay Abraham I Link: https://lore.kernel.org/r/20220510091630.16564-3-kishon@ti.com Signed-off-by: Adrian Zaharia Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2967372a9988..473b0b64dd57 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -696,6 +696,8 @@ int xhci_run(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Finished xhci_run for USB2 roothub"); + set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags); + xhci_dbc_init(xhci); xhci_debugfs_init(xhci); From ce3aa7694627c35e761459d8dbd1b3b4ef0121ce Mon Sep 17 00:00:00 2001 From: "Alexey V. Vissarionov" Date: Wed, 18 Jan 2023 06:12:55 +0300 Subject: [PATCH 101/143] scsi: hpsa: Fix allocation size for scsi_host_alloc() [ Upstream commit bbbd25499100c810ceaf5193c3cfcab9f7402a33 ] The 'h' is a pointer to struct ctlr_info, so it's just 4 or 8 bytes, while the structure itself is much bigger. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: edd163687ea5 ("hpsa: add driver for HP Smart Array controllers.") Link: https://lore.kernel.org/r/20230118031255.GE15213@altlinux.org Signed-off-by: Alexey V. Vissarionov Acked-by: Don Brace Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index b2d4b6c78b5c..a44a098dbb9c 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5834,7 +5834,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) { struct Scsi_Host *sh; - sh = scsi_host_alloc(&hpsa_driver_template, sizeof(h)); + sh = scsi_host_alloc(&hpsa_driver_template, sizeof(struct ctlr_info)); if (sh == NULL) { dev_err(&h->pdev->dev, "scsi_host_alloc failed\n"); return -ENOMEM; From 083b3dda86f81f9493d43e0210e6a9a2cc1bee7a Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Mon, 5 Dec 2022 11:35:57 +0100 Subject: [PATCH 102/143] module: Don't wait for GOING modules commit 0254127ab977e70798707a7a2b757c9f3c971210 upstream. During a system boot, it can happen that the kernel receives a burst of requests to insert the same module but loading it eventually fails during its init call. For instance, udev can make a request to insert a frequency module for each individual CPU when another frequency module is already loaded which causes the init function of the new module to return an error. Since commit 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for modules that have finished loading"), the kernel waits for modules in MODULE_STATE_GOING state to finish unloading before making another attempt to load the same module. This creates unnecessary work in the described scenario and delays the boot. In the worst case, it can prevent udev from loading drivers for other devices and might cause timeouts of services waiting on them and subsequently a failed boot. This patch attempts a different solution for the problem 6e6de3dee51a was trying to solve. Rather than waiting for the unloading to complete, it returns a different error code (-EBUSY) for modules in the GOING state. This should avoid the error situation that was described in 6e6de3dee51a (user space attempting to load a dependent module because the -EEXIST error code would suggest to user space that the first module had been loaded successfully), while avoiding the delay situation too. This has been tested on linux-next since December 2022 and passes all kmod selftests except test 0009 with module compression enabled but it has been confirmed that this issue has existed and has gone unnoticed since prior to this commit and can also be reproduced without module compression with a simple usleep(5000000) on tools/modprobe.c [0]. These failures are caused by hitting the kernel mod_concurrent_max and can happen either due to a self inflicted kernel module auto-loead DoS somehow or on a system with large CPU count and each CPU count incorrectly triggering many module auto-loads. Both of those issues need to be fixed in-kernel. [0] https://lore.kernel.org/all/Y9A4fiobL6IHp%2F%2FP@bombadil.infradead.org/ Fixes: 6e6de3dee51a ("kernel/module.c: Only return -EEXIST for modules that have finished loading") Co-developed-by: Martin Wilck Signed-off-by: Martin Wilck Signed-off-by: Petr Pavlu Cc: stable@vger.kernel.org Reviewed-by: Petr Mladek [mcgrof: enhance commit log with testing and kmod test result interpretation ] Signed-off-by: Luis Chamberlain Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/kernel/module.c b/kernel/module.c index 6a0fd245c048..33d1dc6d4cd6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3661,7 +3661,8 @@ static bool finished_loading(const char *name) sched_annotate_sleep(); mutex_lock(&module_mutex); mod = find_module_all(name, strlen(name), true); - ret = !mod || mod->state == MODULE_STATE_LIVE; + ret = !mod || mod->state == MODULE_STATE_LIVE + || mod->state == MODULE_STATE_GOING; mutex_unlock(&module_mutex); return ret; @@ -3827,20 +3828,35 @@ static int add_unformed_module(struct module *mod) mod->state = MODULE_STATE_UNFORMED; -again: mutex_lock(&module_mutex); old = find_module_all(mod->name, strlen(mod->name), true); if (old != NULL) { - if (old->state != MODULE_STATE_LIVE) { + if (old->state == MODULE_STATE_COMING + || old->state == MODULE_STATE_UNFORMED) { /* Wait in case it fails to load. */ mutex_unlock(&module_mutex); err = wait_event_interruptible(module_wq, finished_loading(mod->name)); if (err) goto out_unlocked; - goto again; + + /* The module might have gone in the meantime. */ + mutex_lock(&module_mutex); + old = find_module_all(mod->name, strlen(mod->name), + true); } - err = -EEXIST; + + /* + * We are here only when the same module was being loaded. Do + * not try to load it again right now. It prevents long delays + * caused by serialized module load failures. It might happen + * when more devices of the same type trigger load of + * a particular module. + */ + if (old && old->state == MODULE_STATE_LIVE) + err = -EEXIST; + else + err = -EBUSY; goto out; } mod_update_bounds(mod); From de3930a4883ddad2244efd6d349013294c62c75c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 4 Jan 2023 16:14:12 -0500 Subject: [PATCH 103/143] tracing: Make sure trace_printk() can output as soon as it can be used commit 3bb06eb6e9acf7c4a3e1b5bc87aed398ff8e2253 upstream. Currently trace_printk() can be used as soon as early_trace_init() is called from start_kernel(). But if a crash happens, and "ftrace_dump_on_oops" is set on the kernel command line, all you get will be: [ 0.456075] -0 0dN.2. 347519us : Unknown type 6 [ 0.456075] -0 0dN.2. 353141us : Unknown type 6 [ 0.456075] -0 0dN.2. 358684us : Unknown type 6 This is because the trace_printk() event (type 6) hasn't been registered yet. That gets done via an early_initcall(), which may be early, but not early enough. Instead of registering the trace_printk() event (and other ftrace events, which are not trace events) via an early_initcall(), have them registered at the same time that trace_printk() can be used. This way, if there is a crash before early_initcall(), then the trace_printk()s will actually be useful. Link: https://lkml.kernel.org/r/20230104161412.019f6c55@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Fixes: e725c731e3bb1 ("tracing: Split tracing initialization into two for early initialization") Reported-by: "Joel Fernandes (Google)" Tested-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 ++ kernel/trace/trace.h | 1 + kernel/trace/trace_output.c | 3 +-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c7c92b0eed04..f06d48be5a96 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9680,6 +9680,8 @@ void __init early_trace_init(void) static_key_enable(&tracepoint_printk_key.key); } tracer_alloc_buffers(); + + init_events(); } void __init trace_init(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8d67f7f44840..37f616bf5fa9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1673,6 +1673,7 @@ extern void trace_event_enable_cmd_record(bool enable); extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); +extern int init_events(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); extern void __trace_early_add_events(struct trace_array *tr); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 000e9dc224c6..b3ee8d9b6b62 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1378,7 +1378,7 @@ static struct trace_event *events[] __initdata = { NULL }; -__init static int init_events(void) +__init int init_events(void) { struct trace_event *event; int i, ret; @@ -1396,4 +1396,3 @@ __init static int init_events(void) return 0; } -early_initcall(init_events); From 886aa449235f478e262bbd5dcdee6ed6bc202949 Mon Sep 17 00:00:00 2001 From: Natalia Petrova Date: Wed, 11 Jan 2023 15:04:09 +0300 Subject: [PATCH 104/143] trace_events_hist: add check for return value of 'create_hist_field' commit 8b152e9150d07a885f95e1fd401fc81af202d9a4 upstream. Function 'create_hist_field' is called recursively at trace_events_hist.c:1954 and can return NULL-value that's why we have to check it to avoid null pointer dereference. Found by Linux Verification Center (linuxtesting.org) with SVACE. Link: https://lkml.kernel.org/r/20230111120409.4111-1-n.petrova@fintech.ru Cc: stable@vger.kernel.org Fixes: 30350d65ac56 ("tracing: Add variable support to hist triggers") Signed-off-by: Natalia Petrova Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0ae3e4454ff2..ccc99cd23f3c 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1646,6 +1646,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; hist_field->fn = hist_field_log2; hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); + if (!hist_field->operands[0]) + goto free; hist_field->size = hist_field->operands[0]->size; hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL); if (!hist_field->type) From c42a6e68706090e4307cd19b662eeead8fa6be3c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 23 Jan 2023 11:22:52 -0500 Subject: [PATCH 105/143] ftrace/scripts: Update the instructions for ftrace-bisect.sh commit 7ae4ba7195b1bac04a4210a499da9d8c63b0ba9c upstream. The instructions for the ftrace-bisect.sh script, which is used to find what function is being traced that is causing a kernel crash, and possibly a triple fault reboot, uses the old method. In 5.1, a new feature was added that let the user write in the index into available_filter_functions that maps to the function a user wants to set in set_ftrace_filter (or set_ftrace_notrace). This takes O(1) to set, as suppose to writing a function name, which takes O(n) (where n is the number of functions in available_filter_functions). The ftrace-bisect.sh requires setting half of the functions in available_filter_functions, which is O(n^2) using the name method to enable and can take several minutes to complete. The number method is O(n) which takes less than a second to complete. Using the number method for any kernel 5.1 and after is the proper way to do the bisect. Update the usage to reflect the new change, as well as using the /sys/kernel/tracing path instead of the obsolete debugfs path. Link: https://lkml.kernel.org/r/20230123112252.022003dd@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Acked-by: Mark Rutland Fixes: f79b3f338564e ("ftrace: Allow enabling of filters via index of available_filter_functions") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- scripts/tracing/ftrace-bisect.sh | 34 ++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/scripts/tracing/ftrace-bisect.sh b/scripts/tracing/ftrace-bisect.sh index 926701162bc8..bb4f59262bbe 100755 --- a/scripts/tracing/ftrace-bisect.sh +++ b/scripts/tracing/ftrace-bisect.sh @@ -12,7 +12,7 @@ # (note, if this is a problem with function_graph tracing, then simply # replace "function" with "function_graph" in the following steps). # -# # cd /sys/kernel/debug/tracing +# # cd /sys/kernel/tracing # # echo schedule > set_ftrace_filter # # echo function > current_tracer # @@ -20,22 +20,40 @@ # # # echo nop > current_tracer # -# # cat available_filter_functions > ~/full-file +# Starting with v5.1 this can be done with numbers, making it much faster: +# +# The old (slow) way, for kernels before v5.1. +# +# [old-way] # cat available_filter_functions > ~/full-file +# +# [old-way] *** Note *** this process will take several minutes to update the +# [old-way] filters. Setting multiple functions is an O(n^2) operation, and we +# [old-way] are dealing with thousands of functions. So go have coffee, talk +# [old-way] with your coworkers, read facebook. And eventually, this operation +# [old-way] will end. +# +# The new way (using numbers) is an O(n) operation, and usually takes less than a second. +# +# seq `wc -l available_filter_functions | cut -d' ' -f1` > ~/full-file +# +# This will create a sequence of numbers that match the functions in +# available_filter_functions, and when echoing in a number into the +# set_ftrace_filter file, it will enable the corresponding function in +# O(1) time. Making enabling all functions O(n) where n is the number of +# functions to enable. +# +# For either the new or old way, the rest of the operations remain the same. +# # # ftrace-bisect ~/full-file ~/test-file ~/non-test-file # # cat ~/test-file > set_ftrace_filter # -# *** Note *** this will take several minutes. Setting multiple functions is -# an O(n^2) operation, and we are dealing with thousands of functions. So go -# have coffee, talk with your coworkers, read facebook. And eventually, this -# operation will end. -# # # echo function > current_tracer # # If it crashes, we know that ~/test-file has a bad function. # # Reboot back to test kernel. # -# # cd /sys/kernel/debug/tracing +# # cd /sys/kernel/tracing # # mv ~/test-file ~/full-file # # If it didn't crash. From e037baee16e0b9ace7e730888fcae9cec11daff2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 25 Jan 2023 14:02:13 +0000 Subject: [PATCH 106/143] cifs: Fix oops due to uncleared server->smbd_conn in reconnect commit b7ab9161cf5ddc42a288edf9d1a61f3bdffe17c7 upstream. In smbd_destroy(), clear the server->smbd_conn pointer after freeing the smbd_connection struct that it points to so that reconnection doesn't get confused. Fixes: 8ef130f9ec27 ("CIFS: SMBD: Implement function to destroy a SMB Direct connection") Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (SUSE) Acked-by: Tom Talpey Signed-off-by: David Howells Cc: Long Li Cc: Pavel Shilovsky Cc: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smbdirect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index b029ed31ef91..f73f9b062525 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -1394,6 +1394,7 @@ void smbd_destroy(struct TCP_Server_Info *server) destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); kfree(info); + server->smbd_conn = NULL; } /* From 76c5640737b82cf0a8d3a001437405c139b12b58 Mon Sep 17 00:00:00 2001 From: Hendrik Borghorst Date: Mon, 14 Nov 2022 16:48:23 +0000 Subject: [PATCH 107/143] KVM: x86/vmx: Do not skip segment attributes if unusable bit is set commit a44b331614e6f7e63902ed7dff7adc8c85edd8bc upstream. When serializing and deserializing kvm_sregs, attributes of the segment descriptors are stored by user space. For unusable segments, vmx_segment_access_rights skips all attributes and sets them to 0. This means we zero out the DPL (Descriptor Privilege Level) for unusable entries. Unusable segments are - contrary to their name - usable in 64bit mode and are used by guests to for example create a linear map through the NULL selector. VMENTER checks if SS.DPL is correct depending on the CS segment type. For types 9 (Execute Only) and 11 (Execute Read), CS.DPL must be equal to SS.DPL [1]. We have seen real world guests setting CS to a usable segment with DPL=3 and SS to an unusable segment with DPL=3. Once we go through an sregs get/set cycle, SS.DPL turns to 0. This causes the virtual machine to crash reproducibly. This commit changes the attribute logic to always preserve attributes for unusable segments. According to [2] SS.DPL is always saved on VM exits, regardless of the unusable bit so user space applications should have saved the information on serialization correctly. [3] specifies that besides SS.DPL the rest of the attributes of the descriptors are undefined after VM entry if unusable bit is set. So, there should be no harm in setting them all to the previous state. [1] Intel SDM Vol 3C 26.3.1.2 Checks on Guest Segment Registers [2] Intel SDM Vol 3C 27.3.2 Saving Segment Registers and Descriptor-Table Registers [3] Intel SDM Vol 3C 26.3.2.2 Loading Guest Segment Registers and Descriptor-Table Registers Cc: Alexander Graf Cc: stable@vger.kernel.org Signed-off-by: Hendrik Borghorst Reviewed-by: Jim Mattson Reviewed-by: Alexander Graf Message-Id: <20221114164823.69555-1-hborghor@amazon.de> Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx/vmx.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index af6742d11ca1..8f7152e158e2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3332,18 +3332,15 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var) { u32 ar; - if (var->unusable || !var->present) - ar = 1 << 16; - else { - ar = var->type & 15; - ar |= (var->s & 1) << 4; - ar |= (var->dpl & 3) << 5; - ar |= (var->present & 1) << 7; - ar |= (var->avl & 1) << 12; - ar |= (var->l & 1) << 13; - ar |= (var->db & 1) << 14; - ar |= (var->g & 1) << 15; - } + ar = var->type & 15; + ar |= (var->s & 1) << 4; + ar |= (var->dpl & 3) << 5; + ar |= (var->present & 1) << 7; + ar |= (var->avl & 1) << 12; + ar |= (var->l & 1) << 13; + ar |= (var->db & 1) << 14; + ar |= (var->g & 1) << 15; + ar |= (var->unusable || !var->present) << 16; return ar; } From 98d85586aace82fbe903f606aa0eb5263df6137a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 23 Jan 2023 09:21:10 -0800 Subject: [PATCH 108/143] thermal: intel: int340x: Protect trip temperature from concurrent updates commit 6757a7abe47bcb12cb2d45661067e182424b0ee3 upstream. Trip temperatures are read using ACPI methods and stored in the memory during zone initializtion and when the firmware sends a notification for change. This trip temperature is returned when the thermal core calls via callback get_trip_temp(). But it is possible that while updating the memory copy of the trips when the firmware sends a notification for change, thermal core is reading the trip temperature via the callback get_trip_temp(). This may return invalid trip temperature. To address this add a mutex to protect the invalid temperature reads in the callback get_trip_temp() and int340x_thermal_read_trips(). Fixes: 5fbf7f27fa3d ("Thermal/int340x: Add common thermal zone handler") Signed-off-by: Srinivas Pandruvada Cc: 5.0+ # 5.0+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- .../int340x_thermal/int340x_thermal_zone.c | 18 +++++++++++++++--- .../int340x_thermal/int340x_thermal_zone.h | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index a337600d5bc4..c9e0050bce17 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -44,11 +44,13 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone, int trip, int *temp) { struct int34x_thermal_zone *d = zone->devdata; - int i; + int i, ret = 0; if (d->override_ops && d->override_ops->get_trip_temp) return d->override_ops->get_trip_temp(zone, trip, temp); + mutex_lock(&d->trip_mutex); + if (trip < d->aux_trip_nr) *temp = d->aux_trips[trip]; else if (trip == d->crt_trip_id) @@ -66,10 +68,12 @@ static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone, } } if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT) - return -EINVAL; + ret = -EINVAL; } - return 0; + mutex_unlock(&d->trip_mutex); + + return ret; } static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone, @@ -174,6 +178,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone) int trip_cnt = int34x_zone->aux_trip_nr; int i; + mutex_lock(&int34x_zone->trip_mutex); + int34x_zone->crt_trip_id = -1; if (!int340x_thermal_get_trip_config(int34x_zone->adev->handle, "_CRT", &int34x_zone->crt_temp)) @@ -201,6 +207,8 @@ int int340x_thermal_read_trips(struct int34x_thermal_zone *int34x_zone) int34x_zone->act_trips[i].valid = true; } + mutex_unlock(&int34x_zone->trip_mutex); + return trip_cnt; } EXPORT_SYMBOL_GPL(int340x_thermal_read_trips); @@ -224,6 +232,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, if (!int34x_thermal_zone) return ERR_PTR(-ENOMEM); + mutex_init(&int34x_thermal_zone->trip_mutex); + int34x_thermal_zone->adev = adev; int34x_thermal_zone->override_ops = override_ops; @@ -275,6 +285,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); kfree(int34x_thermal_zone->aux_trips); err_trip_alloc: + mutex_destroy(&int34x_thermal_zone->trip_mutex); kfree(int34x_thermal_zone); return ERR_PTR(ret); } @@ -286,6 +297,7 @@ void int340x_thermal_zone_remove(struct int34x_thermal_zone thermal_zone_device_unregister(int34x_thermal_zone->zone); acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); kfree(int34x_thermal_zone->aux_trips); + mutex_destroy(&int34x_thermal_zone->trip_mutex); kfree(int34x_thermal_zone); } EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove); diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h index 3b4971df1b33..8f9872afd0d3 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h @@ -32,6 +32,7 @@ struct int34x_thermal_zone { struct thermal_zone_device_ops *override_ops; void *priv_data; struct acpi_lpat_conversion_table *lpat_table; + struct mutex trip_mutex; }; struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *, From 0cb922cef7e93a565e8eeccf810a863a24be9fd6 Mon Sep 17 00:00:00 2001 From: Giulio Benetti Date: Tue, 13 Dec 2022 20:24:03 +0100 Subject: [PATCH 109/143] ARM: 9280/1: mm: fix warning on phys_addr_t to void pointer assignment commit a4e03921c1bb118e6718e0a3b0322a2c13ed172b upstream. zero_page is a void* pointer but memblock_alloc() returns phys_addr_t type so this generates a warning while using clang and with -Wint-error enabled that becomes and error. So let's cast the return of memblock_alloc() to (void *). Cc: # 4.14.x + Fixes: 340a982825f7 ("ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation") Signed-off-by: Giulio Benetti Signed-off-by: Russell King (Oracle) Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/nommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 959f05701738..c4d5b3bacf64 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -161,7 +161,7 @@ void __init paging_init(const struct machine_desc *mdesc) mpu_setup(); /* allocate the zero page. */ - zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + zero_page = (void *)memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!zero_page) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", __func__, PAGE_SIZE, PAGE_SIZE); From 511f6c7c40b03685779a3cfe49c9f25b383e3c25 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 18 Jan 2023 20:38:48 +0530 Subject: [PATCH 110/143] EDAC/device: Respect any driver-supplied workqueue polling value commit cec669ff716cc83505c77b242aecf6f7baad869d upstream. The EDAC drivers may optionally pass the poll_msec value. Use that value if available, else fall back to 1000ms. [ bp: Touchups. ] Fixes: e27e3dac6517 ("drivers/edac: add edac_device class") Reported-by: Luca Weiss Signed-off-by: Manivannan Sadhasivam Signed-off-by: Borislav Petkov (AMD) Tested-by: Steev Klimaszewski # Thinkpad X13s Tested-by: Andrew Halaney # sa8540p-ride Cc: # 4.9 Link: https://lore.kernel.org/r/COZYL8MWN97H.MROQ391BGA09@otso Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_device.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 8220ce5b87ca..85c229985f90 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -34,6 +34,9 @@ static DEFINE_MUTEX(device_ctls_mutex); static LIST_HEAD(edac_device_list); +/* Default workqueue processing interval on this instance, in msecs */ +#define DEFAULT_POLL_INTERVAL 1000 + #ifdef CONFIG_EDAC_DEBUG static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev) { @@ -366,7 +369,7 @@ static void edac_device_workq_function(struct work_struct *work_req) * whole one second to save timers firing all over the period * between integral seconds */ - if (edac_dev->poll_msec == 1000) + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else edac_queue_work(&edac_dev->work, edac_dev->delay); @@ -396,7 +399,7 @@ static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, * timers firing on sub-second basis, while they are happy * to fire together on the 1 second exactly */ - if (edac_dev->poll_msec == 1000) + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else edac_queue_work(&edac_dev->work, edac_dev->delay); @@ -430,7 +433,7 @@ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, edac_dev->delay = msecs_to_jiffies(msec); /* See comment in edac_device_workq_setup() above */ - if (edac_dev->poll_msec == 1000) + if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL) edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); else edac_mod_work(&edac_dev->work, edac_dev->delay); @@ -472,11 +475,7 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev) /* This instance is NOW RUNNING */ edac_dev->op_state = OP_RUNNING_POLL; - /* - * enable workq processing on this instance, - * default = 1000 msec - */ - edac_device_workq_setup(edac_dev, 1000); + edac_device_workq_setup(edac_dev, edac_dev->poll_msec ?: DEFAULT_POLL_INTERVAL); } else { edac_dev->op_state = OP_RUNNING_INTERRUPT; } From 76d9ebb7f0bc10fbc78b6d576751552edf743968 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Wed, 18 Jan 2023 20:38:50 +0530 Subject: [PATCH 111/143] EDAC/qcom: Do not pass llcc_driv_data as edac_device_ctl_info's pvt_info commit 977c6ba624f24ae20cf0faee871257a39348d4a9 upstream. The memory for llcc_driv_data is allocated by the LLCC driver. But when it is passed as the private driver info to the EDAC core, it will get freed during the qcom_edac driver release. So when the qcom_edac driver gets probed again, it will try to use the freed data leading to the use-after-free bug. Hence, do not pass llcc_driv_data as pvt_info but rather reference it using the platform_data pointer in the qcom_edac driver. Fixes: 27450653f1db ("drivers: edac: Add EDAC driver support for QCOM SoCs") Reported-by: Steev Klimaszewski Signed-off-by: Manivannan Sadhasivam Signed-off-by: Borislav Petkov (AMD) Tested-by: Steev Klimaszewski # Thinkpad X13s Tested-by: Andrew Halaney # sa8540p-ride Cc: # 4.20 Link: https://lore.kernel.org/r/20230118150904.26913-4-manivannan.sadhasivam@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/edac/qcom_edac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c index 97a27e42dd61..c45519f59dc1 100644 --- a/drivers/edac/qcom_edac.c +++ b/drivers/edac/qcom_edac.c @@ -252,7 +252,7 @@ dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type) static int dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank) { - struct llcc_drv_data *drv = edev_ctl->pvt_info; + struct llcc_drv_data *drv = edev_ctl->dev->platform_data; int ret; ret = dump_syn_reg_values(drv, bank, err_type); @@ -289,7 +289,7 @@ static irqreturn_t llcc_ecc_irq_handler(int irq, void *edev_ctl) { struct edac_device_ctl_info *edac_dev_ctl = edev_ctl; - struct llcc_drv_data *drv = edac_dev_ctl->pvt_info; + struct llcc_drv_data *drv = edac_dev_ctl->dev->platform_data; irqreturn_t irq_rc = IRQ_NONE; u32 drp_error, trp_error, i; int ret; @@ -358,7 +358,6 @@ static int qcom_llcc_edac_probe(struct platform_device *pdev) edev_ctl->dev_name = dev_name(dev); edev_ctl->ctl_name = "llcc"; edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE; - edev_ctl->pvt_info = llcc_driv_data; rc = edac_device_add_device(edev_ctl); if (rc) From 974aaf11804f658aad8b03e3950e41c80efd04a8 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 8 Dec 2020 17:41:42 +0100 Subject: [PATCH 112/143] units: Add Watt units [ Upstream commit 2ee5f8f05949735fa2f4c463a5e13fcb3660c719 ] As there are the temperature units, let's add the Watt macros definition. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Signed-off-by: Rafael J. Wysocki Stable-dep-of: c8c37bc51451 ("i2c: designware: use casting of u64 in clock multiplication to avoid overflow") Signed-off-by: Sasha Levin --- include/linux/units.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/units.h b/include/linux/units.h index aaf716364ec3..92c234e71cab 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -4,6 +4,10 @@ #include +#define MILLIWATT_PER_WATT 1000L +#define MICROWATT_PER_MILLIWATT 1000L +#define MICROWATT_PER_WATT 1000000L + #define ABSOLUTE_ZERO_MILLICELSIUS -273150 static inline long milli_kelvin_to_millicelsius(long t) From 8ebc2efcb6d6731c12a3c3b18f9c2d17b651e888 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 12 Jul 2021 17:20:25 +0300 Subject: [PATCH 113/143] units: Add SI metric prefix definitions [ Upstream commit 26471d4a6cf8d5d0bd0fb55c7169de7d67cc703a ] Sometimes it's useful to have well-defined SI metric prefix to be used to self-describe the formulas or equations. List most popular ones in the units.h. Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang Stable-dep-of: c8c37bc51451 ("i2c: designware: use casting of u64 in clock multiplication to avoid overflow") Signed-off-by: Sasha Levin --- include/linux/units.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/linux/units.h b/include/linux/units.h index 92c234e71cab..3457179f7116 100644 --- a/include/linux/units.h +++ b/include/linux/units.h @@ -4,6 +4,22 @@ #include +/* Metric prefixes in accordance with Système international (d'unités) */ +#define PETA 1000000000000000ULL +#define TERA 1000000000000ULL +#define GIGA 1000000000UL +#define MEGA 1000000UL +#define KILO 1000UL +#define HECTO 100UL +#define DECA 10UL +#define DECI 10UL +#define CENTI 100UL +#define MILLI 1000UL +#define MICRO 1000000UL +#define NANO 1000000000UL +#define PICO 1000000000000ULL +#define FEMTO 1000000000000000ULL + #define MILLIWATT_PER_WATT 1000L #define MICROWATT_PER_MILLIWATT 1000L #define MICROWATT_PER_WATT 1000000L From 8949ef3a7a0c938e3813f79085af97f32d060347 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 12 Jul 2021 17:20:26 +0300 Subject: [PATCH 114/143] i2c: designware: Use DIV_ROUND_CLOSEST() macro [ Upstream commit c045214a0f31dd5d6be716ed2f119b57b6c5d3a2 ] Instead of open-coding DIV_ROUND_CLOSEST() and similar use the macros directly. While at it, replace numbers with predefined SI metric prefixes. No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang Stable-dep-of: c8c37bc51451 ("i2c: designware: use casting of u64 in clock multiplication to avoid overflow") Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-designware-common.c | 8 ++++---- drivers/i2c/busses/i2c-designware-platdrv.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index 9468c6c89b3f..73a4ef8130e6 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "i2c-designware-core.h" @@ -347,7 +348,7 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset) * * If your hardware is free from tHD;STA issue, try this one. */ - return (ic_clk * tSYMBOL + 500000) / 1000000 - 8 + offset; + return DIV_ROUND_CLOSEST(ic_clk * tSYMBOL, MICRO) - 8 + offset; else /* * Conditional expression: @@ -363,8 +364,7 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset) * The reason why we need to take into account "tf" here, * is the same as described in i2c_dw_scl_lcnt(). */ - return (ic_clk * (tSYMBOL + tf) + 500000) / 1000000 - - 3 + offset; + return DIV_ROUND_CLOSEST(ic_clk * (tSYMBOL + tf), MICRO) - 3 + offset; } u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset) @@ -380,7 +380,7 @@ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset) * account the fall time of SCL signal (tf). Default tf value * should be 0.3 us, for safety. */ - return ((ic_clk * (tLOW + tf) + 500000) / 1000000) - 1 + offset; + return DIV_ROUND_CLOSEST(ic_clk * (tLOW + tf), MICRO) - 1 + offset; } int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index ad91c7c0faa5..474754151725 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -32,12 +32,13 @@ #include #include #include +#include #include "i2c-designware-core.h" static u32 i2c_dw_get_clk_rate_khz(struct dw_i2c_dev *dev) { - return clk_get_rate(dev->clk)/1000; + return clk_get_rate(dev->clk) / KILO; } #ifdef CONFIG_ACPI @@ -284,7 +285,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) if (!dev->sda_hold_time && t->sda_hold_ns) dev->sda_hold_time = - div_u64(clk_khz * t->sda_hold_ns + 500000, 1000000); + DIV_S64_ROUND_CLOSEST(clk_khz * t->sda_hold_ns, MICRO); } adap = &dev->adapter; From ed173f77fd28a3e4fffc13b3f28687b9eba61157 Mon Sep 17 00:00:00 2001 From: Lareine Khawaly Date: Wed, 21 Dec 2022 19:59:00 +0000 Subject: [PATCH 115/143] i2c: designware: use casting of u64 in clock multiplication to avoid overflow [ Upstream commit c8c37bc514514999e62a17e95160ed9ebf75ca8d ] In functions i2c_dw_scl_lcnt() and i2c_dw_scl_hcnt() may have overflow by depending on the values of the given parameters including the ic_clk. For example in our use case where ic_clk is larger than one million, multiplication of ic_clk * 4700 will result in 32 bit overflow. Add cast of u64 to the calculation to avoid multiplication overflow, and use the corresponding define for divide. Fixes: 2373f6b9744d ("i2c-designware: split of i2c-designware.c into core and bus specific parts") Signed-off-by: Lareine Khawaly Signed-off-by: Hanna Hawa Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-designware-common.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index 73a4ef8130e6..682fffaab2b4 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -348,7 +348,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset) * * If your hardware is free from tHD;STA issue, try this one. */ - return DIV_ROUND_CLOSEST(ic_clk * tSYMBOL, MICRO) - 8 + offset; + return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * tSYMBOL, MICRO) - + 8 + offset; else /* * Conditional expression: @@ -364,7 +365,8 @@ u32 i2c_dw_scl_hcnt(u32 ic_clk, u32 tSYMBOL, u32 tf, int cond, int offset) * The reason why we need to take into account "tf" here, * is the same as described in i2c_dw_scl_lcnt(). */ - return DIV_ROUND_CLOSEST(ic_clk * (tSYMBOL + tf), MICRO) - 3 + offset; + return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tSYMBOL + tf), MICRO) - + 3 + offset; } u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset) @@ -380,7 +382,8 @@ u32 i2c_dw_scl_lcnt(u32 ic_clk, u32 tLOW, u32 tf, int offset) * account the fall time of SCL signal (tf). Default tf value * should be 0.3 us, for safety. */ - return DIV_ROUND_CLOSEST(ic_clk * (tLOW + tf), MICRO) - 1 + offset; + return DIV_ROUND_CLOSEST_ULL((u64)ic_clk * (tLOW + tf), MICRO) - + 1 + offset; } int i2c_dw_set_sda_hold(struct dw_i2c_dev *dev) From 539ca5dcbc91134bbe2c45677811c31d8b030d2d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Jan 2023 11:01:50 +0000 Subject: [PATCH 116/143] netlink: prevent potential spectre v1 gadgets [ Upstream commit f0950402e8c76e7dcb08563f1b4e8000fbc62455 ] Most netlink attributes are parsed and validated from __nla_validate_parse() or validate_nla() u16 type = nla_type(nla); if (type == 0 || type > maxtype) { /* error or continue */ } @type is then used as an array index and can be used as a Spectre v1 gadget. array_index_nospec() can be used to prevent leaking content of kernel memory to malicious users. This should take care of vast majority of netlink uses, but an audit is needed to take care of others where validation is not yet centralized in core netlink functions. Fixes: bfa83a9e03cf ("[NETLINK]: Type-safe netlink messages/attributes interface") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230119110150.2678537-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- lib/nlattr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/nlattr.c b/lib/nlattr.c index fe60f9ae9db1..aa8fc4371e93 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -369,6 +370,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (type <= 0 || type > maxtype) return 0; + type = array_index_nospec(type, maxtype + 1); pt = &policy[type]; BUG_ON(pt->type > NLA_TYPE_MAX); @@ -584,6 +586,7 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, } continue; } + type = array_index_nospec(type, maxtype + 1); if (policy) { int err = validate_nla(nla, maxtype, policy, validate, extack, depth); From d4c008f3b7f7d4ffd311eb2dae5e75b3cbddacd0 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 19 Jan 2023 19:55:45 +0100 Subject: [PATCH 117/143] net: fix UaF in netns ops registration error path [ Upstream commit 71ab9c3e2253619136c31c89dbb2c69305cc89b1 ] If net_assign_generic() fails, the current error path in ops_init() tries to clear the gen pointer slot. Anyway, in such error path, the gen pointer itself has not been modified yet, and the existing and accessed one is smaller than the accessed index, causing an out-of-bounds error: BUG: KASAN: slab-out-of-bounds in ops_init+0x2de/0x320 Write of size 8 at addr ffff888109124978 by task modprobe/1018 CPU: 2 PID: 1018 Comm: modprobe Not tainted 6.2.0-rc2.mptcp_ae5ac65fbed5+ #1641 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.1-2.fc37 04/01/2014 Call Trace: dump_stack_lvl+0x6a/0x9f print_address_description.constprop.0+0x86/0x2b5 print_report+0x11b/0x1fb kasan_report+0x87/0xc0 ops_init+0x2de/0x320 register_pernet_operations+0x2e4/0x750 register_pernet_subsys+0x24/0x40 tcf_register_action+0x9f/0x560 do_one_initcall+0xf9/0x570 do_init_module+0x190/0x650 load_module+0x1fa5/0x23c0 __do_sys_finit_module+0x10d/0x1b0 do_syscall_64+0x58/0x80 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f42518f778d Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d cb 56 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007fff96869688 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 00005568ef7f7c90 RCX: 00007f42518f778d RDX: 0000000000000000 RSI: 00005568ef41d796 RDI: 0000000000000003 RBP: 00005568ef41d796 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000003 R11: 0000000000000246 R12: 0000000000000000 R13: 00005568ef7f7d30 R14: 0000000000040000 R15: 0000000000000000 This change addresses the issue by skipping the gen pointer de-reference in the mentioned error-path. Found by code inspection and verified with explicit error injection on a kasan-enabled kernel. Fixes: d266935ac43d ("net: fix UAF issue in nfqnl_nf_hook_drop() when ops_init() failed") Signed-off-by: Paolo Abeni Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/cec4e0f3bb2c77ac03a6154a8508d3930beb5f0f.1674154348.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/net_namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index a3b7d965e9c0..e05dd4f3279a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -155,12 +155,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net) return 0; if (ops->id && ops->size) { -cleanup: ng = rcu_dereference_protected(net->gen, lockdep_is_held(&pernet_ops_rwsem)); ng->ptr[*ops->id] = NULL; } +cleanup: kfree(data); out: From 4aacf3d78424293e318c616016865380b37b9cc5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 14 Jan 2023 23:38:32 +0100 Subject: [PATCH 118/143] netfilter: nft_set_rbtree: Switch to node list walk for overlap detection [ Upstream commit c9e6978e2725a7d4b6cd23b2facd3f11422c0643 ] ...instead of a tree descent, which became overly complicated in an attempt to cover cases where expired or inactive elements would affect comparisons with the new element being inserted. Further, it turned out that it's probably impossible to cover all those cases, as inactive nodes might entirely hide subtrees consisting of a complete interval plus a node that makes the current insertion not overlap. To speed up the overlap check, descent the tree to find a greater element that is closer to the key value to insert. Then walk down the node list for overlap detection. Starting the overlap check from rb_first() unconditionally is slow, it takes 10 times longer due to the full linear traversal of the list. Moreover, perform garbage collection of expired elements when walking down the node list to avoid bogus overlap reports. For the insertion operation itself, this essentially reverts back to the implementation before commit 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion"), except that cases of complete overlap are already handled in the overlap detection phase itself, which slightly simplifies the loop to find the insertion point. Based on initial patch from Stefano Brivio, including text from the original patch description too. Fixes: 7c84d41416d8 ("netfilter: nft_set_rbtree: Detect partial overlaps on insertion") Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_rbtree.c | 322 ++++++++++++++++++++------------- 1 file changed, 192 insertions(+), 130 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 94a5446c5eae..81c747008da3 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) return !nft_rbtree_interval_end(rbe); } -static bool nft_rbtree_equal(const struct nft_set *set, const void *this, - const struct nft_rbtree_elem *interval) +static int nft_rbtree_cmp(const struct nft_set *set, + const struct nft_rbtree_elem *e1, + const struct nft_rbtree_elem *e2) { - return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; + return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), + set->klen); } static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, @@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set const struct nft_rbtree_elem *rbe, *interval = NULL; u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; - const void *this; int d; parent = rcu_dereference_raw(priv->root.rb_node); @@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set rbe = rb_entry(parent, struct nft_rbtree_elem, node); - this = nft_set_ext_key(&rbe->ext); - d = memcmp(this, key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = rcu_dereference_raw(parent->rb_left); if (interval && - nft_rbtree_equal(set, this, interval) && + !nft_rbtree_cmp(set, rbe, interval) && nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(interval)) continue; @@ -214,154 +214,216 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, return rbe; } +static int nft_rbtree_gc_elem(const struct nft_set *__set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + struct nft_set *set = (struct nft_set *)__set; + struct rb_node *prev = rb_prev(&rbe->node); + struct nft_rbtree_elem *rbe_prev; + struct nft_set_gc_batch *gcb; + + gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC); + if (!gcb) + return -ENOMEM; + + /* search for expired end interval coming before this element. */ + do { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + if (nft_rbtree_interval_end(rbe_prev)) + break; + + prev = rb_prev(prev); + } while (prev != NULL); + + rb_erase(&rbe_prev->node, &priv->root); + rb_erase(&rbe->node, &priv->root); + atomic_sub(2, &set->nelems); + + nft_set_gc_batch_add(gcb, rbe); + nft_set_gc_batch_complete(gcb); + + return 0; +} + +static bool nft_rbtree_update_first(const struct nft_set *set, + struct nft_rbtree_elem *rbe, + struct rb_node *first) +{ + struct nft_rbtree_elem *first_elem; + + first_elem = rb_entry(first, struct nft_rbtree_elem, node); + /* this element is closest to where the new element is to be inserted: + * update the first element for the node list path. + */ + if (nft_rbtree_cmp(set, rbe, first_elem) < 0) + return true; + + return false; +} + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) { - bool overlap = false, dup_end_left = false, dup_end_right = false; + struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; + struct rb_node *node, *parent, **p, *first = NULL; struct nft_rbtree *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); - struct nft_rbtree_elem *rbe; - struct rb_node *parent, **p; - int d; + int d, err; - /* Detect overlaps as we descend the tree. Set the flag in these cases: - * - * a1. _ _ __>| ?_ _ __| (insert end before existing end) - * a2. _ _ ___| ?_ _ _>| (insert end after existing end) - * a3. _ _ ___? >|_ _ __| (insert start before existing end) - * - * and clear it later on, as we eventually reach the points indicated by - * '?' above, in the cases described below. We'll always meet these - * later, locally, due to tree ordering, and overlaps for the intervals - * that are the closest together are always evaluated last. - * - * b1. _ _ __>| !_ _ __| (insert end before existing start) - * b2. _ _ ___| !_ _ _>| (insert end after existing start) - * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf) - * '--' no nodes falling in this range - * b4. >|_ _ ! (insert start before existing start) - * - * Case a3. resolves to b3.: - * - if the inserted start element is the leftmost, because the '0' - * element in the tree serves as end element - * - otherwise, if an existing end is found immediately to the left. If - * there are existing nodes in between, we need to further descend the - * tree before we can conclude the new start isn't causing an overlap - * - * or to b4., which, preceded by a3., means we already traversed one or - * more existing intervals entirely, from the right. - * - * For a new, rightmost pair of elements, we'll hit cases b3. and b2., - * in that order. - * - * The flag is also cleared in two special cases: - * - * b5. |__ _ _!|<_ _ _ (insert start right before existing end) - * b6. |__ _ >|!__ _ _ (insert end right after existing start) - * - * which always happen as last step and imply that no further - * overlapping is possible. - * - * Another special case comes from the fact that start elements matching - * an already existing start element are allowed: insertion is not - * performed but we return -EEXIST in that case, and the error will be - * cleared by the caller if NLM_F_EXCL is not present in the request. - * This way, request for insertion of an exact overlap isn't reported as - * error to userspace if not desired. - * - * However, if the existing start matches a pre-existing start, but the - * end element doesn't match the corresponding pre-existing end element, - * we need to report a partial overlap. This is a local condition that - * can be noticed without need for a tracking flag, by checking for a - * local duplicated end for a corresponding start, from left and right, - * separately. + /* Descend the tree to search for an existing element greater than the + * key value to insert that is greater than the new element. This is the + * first element to walk the ordered elements to find possible overlap. */ - parent = NULL; p = &priv->root.rb_node; while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = memcmp(nft_set_ext_key(&rbe->ext), - nft_set_ext_key(&new->ext), - set->klen); + d = nft_rbtree_cmp(set, rbe, new); + if (d < 0) { p = &parent->rb_left; - - if (nft_rbtree_interval_start(new)) { - if (nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext) && !*p) - overlap = false; - } else { - if (dup_end_left && !*p) - return -ENOTEMPTY; - - overlap = nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, - genmask) && - !nft_set_elem_expired(&rbe->ext); - - if (overlap) { - dup_end_right = true; - continue; - } - } } else if (d > 0) { + if (!first || + nft_rbtree_update_first(set, rbe, first)) + first = &rbe->node; + p = &parent->rb_right; - - if (nft_rbtree_interval_end(new)) { - if (dup_end_right && !*p) - return -ENOTEMPTY; - - overlap = nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, - genmask) && - !nft_set_elem_expired(&rbe->ext); - - if (overlap) { - dup_end_left = true; - continue; - } - } else if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) { - overlap = nft_rbtree_interval_end(rbe); - } } else { - if (nft_rbtree_interval_end(rbe) && - nft_rbtree_interval_start(new)) { + if (nft_rbtree_interval_end(rbe)) p = &parent->rb_left; - - if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) - overlap = false; - } else if (nft_rbtree_interval_start(rbe) && - nft_rbtree_interval_end(new)) { + else p = &parent->rb_right; - - if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) - overlap = false; - } else if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) { - *ext = &rbe->ext; - return -EEXIST; - } else { - overlap = false; - if (nft_rbtree_interval_end(rbe)) - p = &parent->rb_left; - else - p = &parent->rb_right; - } } - - dup_end_left = dup_end_right = false; } - if (overlap) + if (!first) + first = rb_first(&priv->root); + + /* Detect overlap by going through the list of valid tree nodes. + * Values stored in the tree are in reversed order, starting from + * highest to lowest value. + */ + for (node = first; node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); + + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + + /* perform garbage collection to avoid bogus overlap reports. */ + if (nft_set_elem_expired(&rbe->ext)) { + err = nft_rbtree_gc_elem(set, priv, rbe); + if (err < 0) + return err; + + continue; + } + + d = nft_rbtree_cmp(set, rbe, new); + if (d == 0) { + /* Matching end element: no need to look for an + * overlapping greater or equal element. + */ + if (nft_rbtree_interval_end(rbe)) { + rbe_le = rbe; + break; + } + + /* first element that is greater or equal to key value. */ + if (!rbe_ge) { + rbe_ge = rbe; + continue; + } + + /* this is a closer more or equal element, update it. */ + if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { + rbe_ge = rbe; + continue; + } + + /* element is equal to key value, make sure flags are + * the same, an existing more or equal start element + * must not be replaced by more or equal end element. + */ + if ((nft_rbtree_interval_start(new) && + nft_rbtree_interval_start(rbe_ge)) || + (nft_rbtree_interval_end(new) && + nft_rbtree_interval_end(rbe_ge))) { + rbe_ge = rbe; + continue; + } + } else if (d > 0) { + /* annotate element greater than the new element. */ + rbe_ge = rbe; + continue; + } else if (d < 0) { + /* annotate element less than the new element. */ + rbe_le = rbe; + break; + } + } + + /* - new start element matching existing start element: full overlap + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. + */ + if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && + nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { + *ext = &rbe_ge->ext; + return -EEXIST; + } + + /* - new end element matching existing end element: full overlap + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. + */ + if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && + nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { + *ext = &rbe_le->ext; + return -EEXIST; + } + + /* - new start element with existing closest, less or equal key value + * being a start element: partial overlap, reported as -ENOTEMPTY. + * Anonymous sets allow for two consecutive start element since they + * are constant, skip them to avoid bogus overlap reports. + */ + if (!nft_set_is_anonymous(set) && rbe_le && + nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) return -ENOTEMPTY; + /* - new end element with existing closest, less or equal key value + * being a end element: partial overlap, reported as -ENOTEMPTY. + */ + if (rbe_le && + nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) + return -ENOTEMPTY; + + /* - new end element with existing closest, greater or equal key value + * being an end element: partial overlap, reported as -ENOTEMPTY + */ + if (rbe_ge && + nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) + return -ENOTEMPTY; + + /* Accepted element: pick insertion point depending on key value */ + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + d = nft_rbtree_cmp(set, rbe, new); + + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; From 0308b7dfea6f9f9fb92e96216126dabff25aa510 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 14 Jan 2023 23:49:46 +0100 Subject: [PATCH 119/143] netfilter: nft_set_rbtree: skip elements in transaction from garbage collection [ Upstream commit 5d235d6ce75c12a7fdee375eb211e4116f7ab01b ] Skip interference with an ongoing transaction, do not perform garbage collection on inactive elements. Reset annotated previous end interval if the expired element is marked as busy (control plane removed the element right before expiration). Fixes: 8d8540c4f5e0 ("netfilter: nft_set_rbtree: add timeout support") Reviewed-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_set_rbtree.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 81c747008da3..4b9a499fe8f4 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -562,23 +562,37 @@ static void nft_rbtree_gc(struct work_struct *work) struct nft_rbtree *priv; struct rb_node *node; struct nft_set *set; + struct net *net; + u8 genmask; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + genmask = nft_genmask_cur(net); write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is + * always visited before the start element. + */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) continue; - if (nft_set_elem_mark_busy(&rbe->ext)) + + if (nft_set_elem_mark_busy(&rbe->ext)) { + rbe_end = NULL; continue; + } if (rbe_prev) { rb_erase(&rbe_prev->node, &priv->root); From eccb532adabc9f9419a115fa79e47abeb48fe49e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 12:59:53 +0000 Subject: [PATCH 120/143] netlink: annotate data races around nlk->portid [ Upstream commit c1bb9484e3b05166880da8574504156ccbd0549e ] syzbot reminds us netlink_getname() runs locklessly [1] This first patch annotates the race against nlk->portid. Following patches take care of the remaining races. [1] BUG: KCSAN: data-race in netlink_getname / netlink_insert write to 0xffff88814176d310 of 4 bytes by task 2315 on cpu 1: netlink_insert+0xf1/0x9a0 net/netlink/af_netlink.c:583 netlink_autobind+0xae/0x180 net/netlink/af_netlink.c:856 netlink_sendmsg+0x444/0x760 net/netlink/af_netlink.c:1895 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x38f/0x500 net/socket.c:2476 ___sys_sendmsg net/socket.c:2530 [inline] __sys_sendmsg+0x19a/0x230 net/socket.c:2559 __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88814176d310 of 4 bytes by task 2316 on cpu 0: netlink_getname+0xcd/0x1a0 net/netlink/af_netlink.c:1144 __sys_getsockname+0x11d/0x1b0 net/socket.c:2026 __do_sys_getsockname net/socket.c:2041 [inline] __se_sys_getsockname net/socket.c:2038 [inline] __x64_sys_getsockname+0x3e/0x50 net/socket.c:2038 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x00000000 -> 0xc9a49780 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 2316 Comm: syz-executor.2 Not tainted 6.2.0-rc3-syzkaller-00030-ge8f60cd7db24-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d96a610929d9..438f45222ca5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -570,7 +570,9 @@ static int netlink_insert(struct sock *sk, u32 portid) if (nlk_sk(sk)->bound) goto err; - nlk_sk(sk)->portid = portid; + /* portid can be read locklessly from netlink_getname(). */ + WRITE_ONCE(nlk_sk(sk)->portid, portid); + sock_hold(sk); err = __netlink_insert(table, sk); @@ -1124,7 +1126,8 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_pid = nlk->dst_portid; nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { - nladdr->nl_pid = nlk->portid; + /* Paired with WRITE_ONCE() in netlink_insert() */ + nladdr->nl_pid = READ_ONCE(nlk->portid); netlink_lock_table(); nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; netlink_unlock_table(); From 8583f52c23c33b411a9c2e1546d6761f41e44d69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 12:59:54 +0000 Subject: [PATCH 121/143] netlink: annotate data races around dst_portid and dst_group [ Upstream commit 004db64d185a5f23dfb891d7701e23713b2420ee ] netlink_getname(), netlink_sendmsg() and netlink_getsockbyportid() can read nlk->dst_portid and nlk->dst_group while another thread is changing them. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 438f45222ca5..cdecd1977583 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1082,8 +1082,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_portid = 0; - nlk->dst_group = 0; + /* dst_portid and dst_group can be read locklessly */ + WRITE_ONCE(nlk->dst_portid, 0); + WRITE_ONCE(nlk->dst_group, 0); return 0; } if (addr->sa_family != AF_NETLINK) @@ -1105,8 +1106,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (err == 0) { sk->sk_state = NETLINK_CONNECTED; - nlk->dst_portid = nladdr->nl_pid; - nlk->dst_group = ffs(nladdr->nl_groups); + /* dst_portid and dst_group can be read locklessly */ + WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); + WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); } return err; @@ -1123,8 +1125,9 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_pad = 0; if (peer) { - nladdr->nl_pid = nlk->dst_portid; - nladdr->nl_groups = netlink_group_mask(nlk->dst_group); + /* Paired with WRITE_ONCE() in netlink_connect() */ + nladdr->nl_pid = READ_ONCE(nlk->dst_portid); + nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group)); } else { /* Paired with WRITE_ONCE() in netlink_insert() */ nladdr->nl_pid = READ_ONCE(nlk->portid); @@ -1154,8 +1157,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); + /* dst_portid can be changed in netlink_connect() */ if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_portid != nlk_sk(ssk)->portid) { + READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -1891,8 +1895,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; netlink_skb_flags |= NETLINK_SKB_DST; } else { - dst_portid = nlk->dst_portid; - dst_group = nlk->dst_group; + /* Paired with WRITE_ONCE() in netlink_connect() */ + dst_portid = READ_ONCE(nlk->dst_portid); + dst_group = READ_ONCE(nlk->dst_group); } /* Paired with WRITE_ONCE() in netlink_insert() */ From 870a565bd6fecacf5be818da962d380184b9401b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 12:59:55 +0000 Subject: [PATCH 122/143] netlink: annotate data races around sk_state [ Upstream commit 9b663b5cbb15b494ef132a3c937641c90646eb73 ] netlink_getsockbyportid() reads sk_state while a concurrent netlink_connect() can change its value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index cdecd1977583..2104fbdd63d2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1081,7 +1081,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if (addr->sa_family == AF_UNSPEC) { - sk->sk_state = NETLINK_UNCONNECTED; + /* paired with READ_ONCE() in netlink_getsockbyportid() */ + WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED); /* dst_portid and dst_group can be read locklessly */ WRITE_ONCE(nlk->dst_portid, 0); WRITE_ONCE(nlk->dst_group, 0); @@ -1105,7 +1106,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, err = netlink_autobind(sock); if (err == 0) { - sk->sk_state = NETLINK_CONNECTED; + /* paired with READ_ONCE() in netlink_getsockbyportid() */ + WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED); /* dst_portid and dst_group can be read locklessly */ WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); @@ -1157,8 +1159,8 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); - /* dst_portid can be changed in netlink_connect() */ - if (sock->sk_state == NETLINK_CONNECTED && + /* dst_portid and sk_state can be changed in netlink_connect() */ + if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED && READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); From 34c6142f0df9cd75cba5a7aa9df0960d2854b415 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 13:30:40 +0000 Subject: [PATCH 123/143] ipv4: prevent potential spectre v1 gadget in ip_metrics_convert() [ Upstream commit 1d1d63b612801b3f0a39b7d4467cad0abd60e5c8 ] if (!type) continue; if (type > RTAX_MAX) return -EINVAL; ... metrics[type - 1] = val; @type being used as an array index, we need to prevent cpu speculation or risk leaking kernel memory content. Fixes: 6cf9dfd3bd62 ("net: fib: move metrics parsing to a helper") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230120133040.3623463-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/metrics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 3205d5f7c8c9..4966ac2aaf87 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include #include #include #include @@ -28,6 +29,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, return -EINVAL; } + type = array_index_nospec(type, RTAX_MAX + 1); if (type == RTAX_CC_ALGO) { char tmp[TCP_CA_NAME_MAX]; From 7f9828fb1f688210e681268490576f0ca65c322a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Jan 2023 13:31:40 +0000 Subject: [PATCH 124/143] ipv4: prevent potential spectre v1 gadget in fib_metrics_match() [ Upstream commit 5e9398a26a92fc402d82ce1f97cc67d832527da0 ] if (!type) continue; if (type > RTAX_MAX) return false; ... fi_val = fi->fib_metrics->metrics[type - 1]; @type being used as an array index, we need to prevent cpu speculation or risk leaking kernel memory content. Fixes: 5f9ae3d9e7e4 ("ipv4: do metrics match when looking up and deleting a route") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230120133140.3624204-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/fib_semantics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ab9fcc6231b8..4e94796ccdbd 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -1021,6 +1022,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) if (type > RTAX_MAX) return false; + type = array_index_nospec(type, RTAX_MAX + 1); if (type == RTAX_CC_ALGO) { char tmp[TCP_CA_NAME_MAX]; bool ecn_ca = false; From 498584ccf46c1996aa4d5677ae7a6417632b9e10 Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Tue, 24 Jan 2023 02:47:18 +0100 Subject: [PATCH 125/143] netfilter: conntrack: fix vtag checks for ABORT/SHUTDOWN_COMPLETE [ Upstream commit a9993591fa94246b16b444eea55d84c54608282a ] RFC 9260, Sec 8.5.1 states that for ABORT/SHUTDOWN_COMPLETE, the chunk MUST be accepted if the vtag of the packet matches its own tag and the T bit is not set OR if it is set to its peer's vtag and the T bit is set in chunk flags. Otherwise the packet MUST be silently dropped. Update vtag verification for ABORT/SHUTDOWN_COMPLETE based on the above description. Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Sriram Yagnaraman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto_sctp.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 7626f3e1c70a..72d0aa603cd6 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -412,22 +412,29 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { - /* Sec 8.5.1 (A) */ + /* (A) vtag MUST be zero */ if (sh->vtag != 0) goto out_unlock; } else if (sch->type == SCTP_CID_ABORT) { - /* Sec 8.5.1 (B) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir]) + /* (B) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { - /* Sec 8.5.1 (C) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir] && - sch->flags & SCTP_CHUNK_FLAG_T) + /* (C) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_COOKIE_ECHO) { - /* Sec 8.5.1 (D) */ + /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; } else if (sch->type == SCTP_CID_HEARTBEAT) { From 7de16d75b20ab13b75a7291f449a1b00090edfea Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 20 Jan 2023 15:19:27 -0800 Subject: [PATCH 126/143] netrom: Fix use-after-free of a listening socket. [ Upstream commit 409db27e3a2eb5e8ef7226ca33be33361b3ed1c9 ] syzbot reported a use-after-free in do_accept(), precisely nr_accept() as sk_prot_alloc() allocated the memory and sock_put() frees it. [0] The issue could happen if the heartbeat timer is fired and nr_heartbeat_expiry() calls nr_destroy_socket(), where a socket has SOCK_DESTROY or a listening socket has SOCK_DEAD. In this case, the first condition cannot be true. SOCK_DESTROY is flagged in nr_release() only when the file descriptor is close()d, but accept() is being called for the listening socket, so the second condition must be true. Usually, the AF_NETROM listener neither starts timers nor sets SOCK_DEAD. However, the condition is met if connect() fails before listen(). connect() starts the t1 timer and heartbeat timer, and t1timer calls nr_disconnect() when timeout happens. Then, SOCK_DEAD is set, and if we call listen(), the heartbeat timer calls nr_destroy_socket(). nr_connect nr_establish_data_link(sk) nr_start_t1timer(sk) nr_start_heartbeat(sk) nr_t1timer_expiry nr_disconnect(sk, ETIMEDOUT) nr_sk(sk)->state = NR_STATE_0 sk->sk_state = TCP_CLOSE sock_set_flag(sk, SOCK_DEAD) nr_listen if (sk->sk_state != TCP_LISTEN) sk->sk_state = TCP_LISTEN nr_heartbeat_expiry switch (nr->state) case NR_STATE_0 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD)) nr_destroy_socket(sk) This path seems expected, and nr_destroy_socket() is called to clean up resources. Initially, there was sock_hold() before nr_destroy_socket() so that the socket would not be freed, but the commit 517a16b1a88b ("netrom: Decrease sock refcount when sock timers expire") accidentally removed it. To fix use-after-free, let's add sock_hold(). [0]: BUG: KASAN: use-after-free in do_accept+0x483/0x510 net/socket.c:1848 Read of size 8 at addr ffff88807978d398 by task syz-executor.3/5315 CPU: 0 PID: 5315 Comm: syz-executor.3 Not tainted 6.2.0-rc3-syzkaller-00165-gd9fc1511728c #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x15e/0x461 mm/kasan/report.c:417 kasan_report+0xbf/0x1f0 mm/kasan/report.c:517 do_accept+0x483/0x510 net/socket.c:1848 __sys_accept4_file net/socket.c:1897 [inline] __sys_accept4+0x9a/0x120 net/socket.c:1927 __do_sys_accept net/socket.c:1944 [inline] __se_sys_accept net/socket.c:1941 [inline] __x64_sys_accept+0x75/0xb0 net/socket.c:1941 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fa436a8c0c9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fa437784168 EFLAGS: 00000246 ORIG_RAX: 000000000000002b RAX: ffffffffffffffda RBX: 00007fa436bac050 RCX: 00007fa436a8c0c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000005 RBP: 00007fa436ae7ae9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffebc6700df R14: 00007fa437784300 R15: 0000000000022000 Allocated by task 5294: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:371 [inline] ____kasan_kmalloc mm/kasan/common.c:330 [inline] __kasan_kmalloc+0xa3/0xb0 mm/kasan/common.c:380 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc+0x5a/0xd0 mm/slab_common.c:981 kmalloc include/linux/slab.h:584 [inline] sk_prot_alloc+0x140/0x290 net/core/sock.c:2038 sk_alloc+0x3a/0x7a0 net/core/sock.c:2091 nr_create+0xb6/0x5f0 net/netrom/af_netrom.c:433 __sock_create+0x359/0x790 net/socket.c:1515 sock_create net/socket.c:1566 [inline] __sys_socket_create net/socket.c:1603 [inline] __sys_socket_create net/socket.c:1588 [inline] __sys_socket+0x133/0x250 net/socket.c:1636 __do_sys_socket net/socket.c:1649 [inline] __se_sys_socket net/socket.c:1647 [inline] __x64_sys_socket+0x73/0xb0 net/socket.c:1647 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 14: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:518 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x13b/0x1a0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:177 [inline] __cache_free mm/slab.c:3394 [inline] __do_kmem_cache_free mm/slab.c:3580 [inline] __kmem_cache_free+0xcd/0x3b0 mm/slab.c:3587 sk_prot_free net/core/sock.c:2074 [inline] __sk_destruct+0x5df/0x750 net/core/sock.c:2166 sk_destruct net/core/sock.c:2181 [inline] __sk_free+0x175/0x460 net/core/sock.c:2192 sk_free+0x7c/0xa0 net/core/sock.c:2203 sock_put include/net/sock.h:1991 [inline] nr_heartbeat_expiry+0x1d7/0x460 net/netrom/nr_timer.c:148 call_timer_fn+0x1da/0x7c0 kernel/time/timer.c:1700 expire_timers+0x2c6/0x5c0 kernel/time/timer.c:1751 __run_timers kernel/time/timer.c:2022 [inline] __run_timers kernel/time/timer.c:1995 [inline] run_timer_softirq+0x326/0x910 kernel/time/timer.c:2035 __do_softirq+0x1fb/0xadc kernel/softirq.c:571 Fixes: 517a16b1a88b ("netrom: Decrease sock refcount when sock timers expire") Reported-by: syzbot+5fafd5cfe1fc91f6b352@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230120231927.51711-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_timer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index a8da88db7893..4e7c968cde2d 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -121,6 +121,7 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { + sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; From cf9a2ce0383e2cb182d223d2ece17f964432367e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jan 2023 08:45:52 +0000 Subject: [PATCH 127/143] net/sched: sch_taprio: do not schedule in taprio_reset() [ Upstream commit ea4fdbaa2f7798cb25adbe4fd52ffc6356f097bb ] As reported by syzbot and hinted by Vinicius, I should not have added a qdisc_synchronize() call in taprio_reset() taprio_reset() can be called with qdisc spinlock held (and BH disabled) as shown in included syzbot report [1]. Only taprio_destroy() needed this synchronization, as explained in the blamed commit changelog. [1] BUG: scheduling while atomic: syz-executor150/5091/0x00000202 2 locks held by syz-executor150/5091: Modules linked in: Preemption disabled at: [<0000000000000000>] 0x0 Kernel panic - not syncing: scheduling while atomic: panic_on_warn set ... CPU: 1 PID: 5091 Comm: syz-executor150 Not tainted 6.2.0-rc3-syzkaller-00219-g010a74f52203 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/12/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 panic+0x2cc/0x626 kernel/panic.c:318 check_panic_on_warn.cold+0x19/0x35 kernel/panic.c:238 __schedule_bug.cold+0xd5/0xfe kernel/sched/core.c:5836 schedule_debug kernel/sched/core.c:5865 [inline] __schedule+0x34e4/0x5450 kernel/sched/core.c:6500 schedule+0xde/0x1b0 kernel/sched/core.c:6682 schedule_timeout+0x14e/0x2a0 kernel/time/timer.c:2167 schedule_timeout_uninterruptible kernel/time/timer.c:2201 [inline] msleep+0xb6/0x100 kernel/time/timer.c:2322 qdisc_synchronize include/net/sch_generic.h:1295 [inline] taprio_reset+0x93/0x270 net/sched/sch_taprio.c:1703 qdisc_reset+0x10c/0x770 net/sched/sch_generic.c:1022 dev_reset_queue+0x92/0x130 net/sched/sch_generic.c:1285 netdev_for_each_tx_queue include/linux/netdevice.h:2464 [inline] dev_deactivate_many+0x36d/0x9f0 net/sched/sch_generic.c:1351 dev_deactivate+0xed/0x1b0 net/sched/sch_generic.c:1374 qdisc_graft+0xe4a/0x1380 net/sched/sch_api.c:1080 tc_modify_qdisc+0xb6b/0x19a0 net/sched/sch_api.c:1689 rtnetlink_rcv_msg+0x43e/0xca0 net/core/rtnetlink.c:6141 netlink_rcv_skb+0x165/0x440 net/netlink/af_netlink.c:2564 netlink_unicast_kernel net/netlink/af_netlink.c:1330 [inline] netlink_unicast+0x547/0x7f0 net/netlink/af_netlink.c:1356 netlink_sendmsg+0x91b/0xe10 net/netlink/af_netlink.c:1932 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 ____sys_sendmsg+0x712/0x8c0 net/socket.c:2476 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2530 __sys_sendmsg+0xf7/0x1c0 net/socket.c:2559 do_syscall_x64 arch/x86/entry/common.c:50 [inline] Fixes: 3a415d59c1db ("net/sched: sch_taprio: fix possible use-after-free") Link: https://lore.kernel.org/netdev/167387581653.2747.13878941339893288655.git-patchwork-notify@kernel.org/T/ Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Vinicius Costa Gomes Link: https://lore.kernel.org/r/20230123084552.574396-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/sch_taprio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 5411bb4cdfc8..e25fe44899ff 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1621,7 +1621,6 @@ static void taprio_reset(struct Qdisc *sch) int i; hrtimer_cancel(&q->advance_timer); - qdisc_synchronize(sch); if (q->qdiscs) { for (i = 0; i < dev->num_tx_queues; i++) From 6ef652f35dcfaa1ab2b2cf6c1694718595148eee Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 23 Jan 2023 14:59:33 -0300 Subject: [PATCH 128/143] sctp: fail if no bound addresses can be used for a given scope [ Upstream commit 458e279f861d3f61796894cd158b780765a1569f ] Currently, if you bind the socket to something like: servaddr.sin6_family = AF_INET6; servaddr.sin6_port = htons(0); servaddr.sin6_scope_id = 0; inet_pton(AF_INET6, "::1", &servaddr.sin6_addr); And then request a connect to: connaddr.sin6_family = AF_INET6; connaddr.sin6_port = htons(20000); connaddr.sin6_scope_id = if_nametoindex("lo"); inet_pton(AF_INET6, "fe88::1", &connaddr.sin6_addr); What the stack does is: - bind the socket - create a new asoc - to handle the connect - copy the addresses that can be used for the given scope - try to connect But the copy returns 0 addresses, and the effect is that it ends up trying to connect as if the socket wasn't bound, which is not the desired behavior. This unexpected behavior also allows KASLR leaks through SCTP diag interface. The fix here then is, if when trying to copy the addresses that can be used for the scope used in connect() it returns 0 addresses, bail out. This is what TCP does with a similar reproducer. Reported-by: Pietro Borrello Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Link: https://lore.kernel.org/r/9fcd182f1099f86c6661f3717f63712ddd1c676c.1674496737.git.marcelo.leitner@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sctp/bind_addr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 59e653b528b1..6b95d3ba8fe1 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, } } + /* If somehow no addresses were found that can be used with this + * scope, it's an error. + */ + if (list_empty(&dest->address_list)) + error = -ENETUNREACH; + out: if (error) sctp_bind_addr_clean(dest); From 3af20f63212d04ad68e370ac17603829b3121362 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 24 Jan 2023 09:02:11 +0900 Subject: [PATCH 129/143] net: ravb: Fix possible hang if RIS2_QFF1 happen [ Upstream commit f3c07758c9007a6bfff5290d9e19d3c41930c897 ] Since this driver enables the interrupt by RIC2_QFE1, this driver should clear the interrupt flag if it happens. Otherwise, the interrupt causes to hang the system. Note that this also fix a minor coding style (a comment indentation) around the fixed code. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Sergey Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/renesas/ravb_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 9ec6d63691aa..410ccd28f653 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -736,14 +736,14 @@ static void ravb_error_interrupt(struct net_device *ndev) ravb_write(ndev, ~(EIS_QFS | EIS_RESERVED), EIS); if (eis & EIS_QFS) { ris2 = ravb_read(ndev, RIS2); - ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF | RIS2_RESERVED), + ravb_write(ndev, ~(RIS2_QFF0 | RIS2_QFF1 | RIS2_RFFF | RIS2_RESERVED), RIS2); /* Receive Descriptor Empty int */ if (ris2 & RIS2_QFF0) priv->stats[RAVB_BE].rx_over_errors++; - /* Receive Descriptor Empty int */ + /* Receive Descriptor Empty int */ if (ris2 & RIS2_QFF1) priv->stats[RAVB_NC].rx_over_errors++; From e9c1b1e1a00b0c668212792d263951a7f3d1ee60 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 25 Jan 2023 13:17:42 +0100 Subject: [PATCH 130/143] thermal: intel: int340x: Add locking to int340x_thermal_get_trip_type() [ Upstream commit acd7e9ee57c880b99671dd99680cb707b7b5b0ee ] In order to prevent int340x_thermal_get_trip_type() from possibly racing with int340x_thermal_read_trips() invoked by int3403_notify() add locking to it in analogy with int340x_thermal_get_trip_temp(). Fixes: 6757a7abe47b ("thermal: intel: int340x: Protect trip temperature from concurrent updates") Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- .../intel/int340x_thermal/int340x_thermal_zone.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c index c9e0050bce17..6952f4e237e1 100644 --- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c @@ -81,11 +81,13 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone, enum thermal_trip_type *type) { struct int34x_thermal_zone *d = zone->devdata; - int i; + int i, ret = 0; if (d->override_ops && d->override_ops->get_trip_type) return d->override_ops->get_trip_type(zone, trip, type); + mutex_lock(&d->trip_mutex); + if (trip < d->aux_trip_nr) *type = THERMAL_TRIP_PASSIVE; else if (trip == d->crt_trip_id) @@ -103,10 +105,12 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone, } } if (i == INT340X_THERMAL_MAX_ACT_TRIP_COUNT) - return -EINVAL; + ret = -EINVAL; } - return 0; + mutex_unlock(&d->trip_mutex); + + return ret; } static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, From 62a0806eb4d2318874563f79c8fdd6bfe34ceddd Mon Sep 17 00:00:00 2001 From: David Christensen Date: Tue, 24 Jan 2023 13:53:39 -0500 Subject: [PATCH 131/143] net/tg3: resolve deadlock in tg3_reset_task() during EEH [ Upstream commit 6c4ca03bd890566d873e3593b32d034bf2f5a087 ] During EEH error injection testing, a deadlock was encountered in the tg3 driver when tg3_io_error_detected() was attempting to cancel outstanding reset tasks: crash> foreach UN bt ... PID: 159 TASK: c0000000067c6000 CPU: 8 COMMAND: "eehd" ... #5 [c00000000681f990] __cancel_work_timer at c00000000019fd18 #6 [c00000000681fa30] tg3_io_error_detected at c00800000295f098 [tg3] #7 [c00000000681faf0] eeh_report_error at c00000000004e25c ... PID: 290 TASK: c000000036e5f800 CPU: 6 COMMAND: "kworker/6:1" ... #4 [c00000003721fbc0] rtnl_lock at c000000000c940d8 #5 [c00000003721fbe0] tg3_reset_task at c008000002969358 [tg3] #6 [c00000003721fc60] process_one_work at c00000000019e5c4 ... PID: 296 TASK: c000000037a65800 CPU: 21 COMMAND: "kworker/21:1" ... #4 [c000000037247bc0] rtnl_lock at c000000000c940d8 #5 [c000000037247be0] tg3_reset_task at c008000002969358 [tg3] #6 [c000000037247c60] process_one_work at c00000000019e5c4 ... PID: 655 TASK: c000000036f49000 CPU: 16 COMMAND: "kworker/16:2" ...:1 #4 [c0000000373ebbc0] rtnl_lock at c000000000c940d8 #5 [c0000000373ebbe0] tg3_reset_task at c008000002969358 [tg3] #6 [c0000000373ebc60] process_one_work at c00000000019e5c4 ... Code inspection shows that both tg3_io_error_detected() and tg3_reset_task() attempt to acquire the RTNL lock at the beginning of their code blocks. If tg3_reset_task() should happen to execute between the times when tg3_io_error_deteced() acquires the RTNL lock and tg3_reset_task_cancel() is called, a deadlock will occur. Moving tg3_reset_task_cancel() call earlier within the code block, prior to acquiring RTNL, prevents this from happening, but also exposes another deadlock issue where tg3_reset_task() may execute AFTER tg3_io_error_detected() has executed: crash> foreach UN bt PID: 159 TASK: c0000000067d2000 CPU: 9 COMMAND: "eehd" ... #4 [c000000006867a60] rtnl_lock at c000000000c940d8 #5 [c000000006867a80] tg3_io_slot_reset at c0080000026c2ea8 [tg3] #6 [c000000006867b00] eeh_report_reset at c00000000004de88 ... PID: 363 TASK: c000000037564000 CPU: 6 COMMAND: "kworker/6:1" ... #3 [c000000036c1bb70] msleep at c000000000259e6c #4 [c000000036c1bba0] napi_disable at c000000000c6b848 #5 [c000000036c1bbe0] tg3_reset_task at c0080000026d942c [tg3] #6 [c000000036c1bc60] process_one_work at c00000000019e5c4 ... This issue can be avoided by aborting tg3_reset_task() if EEH error recovery is already in progress. Fixes: db84bf43ef23 ("tg3: tg3_reset_task() needs to use rtnl_lock to synchronize") Signed-off-by: David Christensen Reviewed-by: Pavan Chebbi Link: https://lore.kernel.org/r/20230124185339.225806-1-drc@linux.vnet.ibm.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/tg3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index be96116dc2cc..613ca6124e3c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -11185,7 +11185,7 @@ static void tg3_reset_task(struct work_struct *work) rtnl_lock(); tg3_full_lock(tp, 0); - if (!netif_running(tp->dev)) { + if (tp->pcierr_recovery || !netif_running(tp->dev)) { tg3_flag_clear(tp, RESET_TASK_PENDING); tg3_full_unlock(tp); rtnl_unlock(); @@ -18179,6 +18179,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, netdev_info(netdev, "PCI I/O error detected\n"); + /* Want to make sure that the reset task doesn't run */ + tg3_reset_task_cancel(tp); + rtnl_lock(); /* Could be second call or maybe we don't have netdev yet */ @@ -18195,9 +18198,6 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, tg3_timer_stop(tp); - /* Want to make sure that the reset task doesn't run */ - tg3_reset_task_cancel(tp); - netif_device_detach(netdev); /* Clean up software state, even if MMIO is blocked */ From 7ff8128bb11651dde6cb45821bd2665790a173b6 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 24 Jan 2023 11:11:57 +0100 Subject: [PATCH 132/143] net: mdio-mux-meson-g12a: force internal PHY off on mux switch [ Upstream commit 7083df59abbc2b7500db312cac706493be0273ff ] Force the internal PHY off then on when switching to the internal path. This fixes problems where the PHY ID is not properly set. Fixes: 7090425104db ("net: phy: add amlogic g12a mdio mux support") Suggested-by: Qi Duan Co-developed-by: Heiner Kallweit Signed-off-by: Heiner Kallweit Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20230124101157.232234-1-jbrunet@baylibre.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/mdio/mdio-mux-meson-g12a.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c index bf86c9c7a288..ab863530c9e8 100644 --- a/drivers/net/mdio/mdio-mux-meson-g12a.c +++ b/drivers/net/mdio/mdio-mux-meson-g12a.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -150,6 +151,7 @@ static const struct clk_ops g12a_ephy_pll_ops = { static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv) { + u32 value; int ret; /* Enable the phy clock */ @@ -163,18 +165,25 @@ static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv) /* Initialize ephy control */ writel(EPHY_G12A_ID, priv->regs + ETH_PHY_CNTL0); - writel(FIELD_PREP(PHY_CNTL1_ST_MODE, 3) | - FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) | - FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) | - PHY_CNTL1_CLK_EN | - PHY_CNTL1_CLKFREQ | - PHY_CNTL1_PHY_ENB, - priv->regs + ETH_PHY_CNTL1); + + /* Make sure we get a 0 -> 1 transition on the enable bit */ + value = FIELD_PREP(PHY_CNTL1_ST_MODE, 3) | + FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) | + FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) | + PHY_CNTL1_CLK_EN | + PHY_CNTL1_CLKFREQ; + writel(value, priv->regs + ETH_PHY_CNTL1); writel(PHY_CNTL2_USE_INTERNAL | PHY_CNTL2_SMI_SRC_MAC | PHY_CNTL2_RX_CLK_EPHY, priv->regs + ETH_PHY_CNTL2); + value |= PHY_CNTL1_PHY_ENB; + writel(value, priv->regs + ETH_PHY_CNTL1); + + /* The phy needs a bit of time to power up */ + mdelay(10); + return 0; } From 230be65a18f8288585a8681e668a694b25ac1f67 Mon Sep 17 00:00:00 2001 From: Ivo Borisov Shopov Date: Thu, 26 Jan 2023 15:10:33 +0200 Subject: [PATCH 133/143] tools: gpio: fix -c option of gpio-event-mon [ Upstream commit 677d85e1a1ee69fa05ccea83847309484be3781c ] Following line should listen for a rising edge and exit after the first one since '-c 1' is provided. # gpio-event-mon -n gpiochip1 -o 0 -r -c 1 It works with kernel 4.19 but it doesn't work with 5.10. In 5.10 the above command doesn't exit after the first rising edge it keep listening for an event forever. The '-c 1' is not taken into an account. The problem is in commit 62757c32d5db ("tools: gpio: add multi-line monitoring to gpio-event-mon"). Before this commit the iterator 'i' in monitor_device() is used for counting of the events (loops). In the case of the above command (-c 1) we should start from 0 and increment 'i' only ones and hit the 'break' statement and exit the process. But after the above commit counting doesn't start from 0, it start from 1 when we listen on one line. It is because 'i' is used from one more purpose, counting of lines (num_lines) and it isn't restore to 0 after following code for (i = 0; i < num_lines; i++) gpiotools_set_bit(&values.mask, i); Restore the initial value of the iterator to 0 in order to allow counting of loops to work for any cases. Fixes: 62757c32d5db ("tools: gpio: add multi-line monitoring to gpio-event-mon") Signed-off-by: Ivo Borisov Shopov Reviewed-by: Andy Shevchenko [Bartosz: tweak the commit message] Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- tools/gpio/gpio-event-mon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 84ae1039b0a8..367c10636890 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -86,6 +86,7 @@ int monitor_device(const char *device_name, gpiotools_test_bit(values.bits, i)); } + i = 0; while (1) { struct gpio_v2_line_event event; From 9f3dd454fea589adf8cf2ae842e441e7fb4be608 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 16 Dec 2022 13:15:34 -0800 Subject: [PATCH 134/143] Revert "Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode" commit 3c44e2b6cde674797b76e76d3a903a63ce8a18bb upstream. This reverts commit ac5408991ea6b06e29129b4d4861097c4c3e0d59 because it causes loss of keyboard on HP 15-da1xxx. Fixes: ac5408991ea6 ("Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode") Reported-by: Jiri Slaby Link: https://lore.kernel.org/r/824effa5-8b9a-c28a-82bb-9b0ab24623e1@kernel.org Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1206358 Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/synaptics.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index f1013b950d57..82577095e175 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -191,7 +191,6 @@ static const char * const smbus_pnp_ids[] = { "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ "SYN3257", /* HP Envy 13-ad105ng */ - "SYN3286", /* HP Laptop 15-da3001TU */ NULL }; From 8fe3e574b3ac426ff4a11299f098e1c91538f3e5 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 19 Aug 2022 22:09:28 +0200 Subject: [PATCH 135/143] nouveau: explicitly wait on the fence in nouveau_bo_move_m2mf commit 6b04ce966a738ecdd9294c9593e48513c0dc90aa upstream. It is a bit unlcear to us why that's helping, but it does and unbreaks suspend/resume on a lot of GPUs without any known drawbacks. Cc: stable@vger.kernel.org # v5.15+ Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/156 Signed-off-by: Karol Herbst Reviewed-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20220819200928.401416-1-kherbst@redhat.com Cc: Salvatore Bonaccorso Cc: Computer Enthusiastic Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index b57dcad8865f..7633f56bc0a4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -823,6 +823,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr, if (ret == 0) { ret = nouveau_fence_new(chan, false, &fence); if (ret == 0) { + /* TODO: figure out a better solution here + * + * wait on the fence here explicitly as going through + * ttm_bo_move_accel_cleanup somehow doesn't seem to do it. + * + * Without this the operation can timeout and we'll fallback to a + * software copy, which might take several minutes to finish. + */ + nouveau_fence_wait(fence, false, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, From 032a7d5ff519888f31ea91e24623f7e26befe641 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 13 Mar 2021 16:08:47 -0500 Subject: [PATCH 136/143] nfsd: Ensure knfsd shuts down when the "nfsd" pseudofs is unmounted commit c6c7f2a84da459bcc3714044e74a9cb66de31039 upstream. In order to ensure that knfsd threads don't linger once the nfsd pseudofs is unmounted (e.g. when the container is killed) we let nfsd_umount() shut down those threads and wait for them to exit. This also should ensure that we don't need to do a kernel mount of the pseudofs, since the thread lifetime is now limited by the lifetime of the filesystem. Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever Signed-off-by: Nikos Tsironis Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/netns.h | 6 +++--- fs/nfsd/nfs4state.c | 8 +------- fs/nfsd/nfsctl.c | 14 ++------------ fs/nfsd/nfsd.h | 3 +-- fs/nfsd/nfssvc.c | 35 ++++++++++++++++++++++++++++++++++- 5 files changed, 41 insertions(+), 25 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 7346acda9d76..02d3d2f0e616 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -42,9 +42,6 @@ struct nfsd_net { bool grace_ended; time64_t boot_time; - /* internal mount of the "nfsd" pseudofilesystem: */ - struct vfsmount *nfsd_mnt; - struct dentry *nfsd_client_dir; /* @@ -121,6 +118,9 @@ struct nfsd_net { wait_queue_head_t ntf_wq; atomic_t ntf_refcnt; + /* Allow umount to wait for nfsd state cleanup */ + struct completion nfsd_shutdown_complete; + /* * clientid and stateid data for construction of net unique COPY * stateids. diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9a47cc66963f..cb13a1649632 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -7394,14 +7394,9 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - ret = get_nfsdfs(net); + ret = nfs4_state_create_net(net); if (ret) return ret; - ret = nfs4_state_create_net(net); - if (ret) { - mntput(nn->nfsd_mnt); - return ret; - } locks_start_grace(net, &nn->nfsd4_manager); nfsd4_client_tracking_init(net); if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0) @@ -7471,7 +7466,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - mntput(nn->nfsd_mnt); } void diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index dedec4771ecc..c4b11560ac1b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1417,6 +1417,8 @@ static void nfsd_umount(struct super_block *sb) { struct net *net = sb->s_fs_info; + nfsd_shutdown_threads(net); + kill_litter_super(sb); put_net(net); } @@ -1429,18 +1431,6 @@ static struct file_system_type nfsd_fs_type = { }; MODULE_ALIAS_FS("nfsd"); -int get_nfsdfs(struct net *net) -{ - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct vfsmount *mnt; - - mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); - nn->nfsd_mnt = mnt; - return 0; -} - #ifdef CONFIG_PROC_FS static int create_proc_exports_entry(void) { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index cb742e17e04a..4362d295ed34 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -85,13 +85,12 @@ int nfsd_get_nrthreads(int n, int *, struct net *); int nfsd_set_nrthreads(int n, int *, struct net *); int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); +void nfsd_shutdown_threads(struct net *net); void nfsd_destroy(struct net *net); bool i_am_nfsd(void); -int get_nfsdfs(struct net *); - struct nfsdfs_client { struct kref cl_ref; void (*cl_release)(struct kref *kref); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c7fffe1453bd..2e61a565cdbd 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -600,6 +600,37 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_module = THIS_MODULE, }; +static void nfsd_complete_shutdown(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + + WARN_ON(!mutex_is_locked(&nfsd_mutex)); + + nn->nfsd_serv = NULL; + complete(&nn->nfsd_shutdown_complete); +} + +void nfsd_shutdown_threads(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; + + mutex_lock(&nfsd_mutex); + serv = nn->nfsd_serv; + if (serv == NULL) { + mutex_unlock(&nfsd_mutex); + return; + } + + svc_get(serv); + /* Kill outstanding nfsd threads */ + serv->sv_ops->svo_setup(serv, NULL, 0); + nfsd_destroy(net); + mutex_unlock(&nfsd_mutex); + /* Wait for shutdown of nfsd_serv to complete */ + wait_for_completion(&nn->nfsd_shutdown_complete); +} + bool i_am_nfsd(void) { return kthread_func(current) == nfsd; @@ -622,11 +653,13 @@ int nfsd_create_serv(struct net *net) &nfsd_thread_sv_ops); if (nn->nfsd_serv == NULL) return -ENOMEM; + init_completion(&nn->nfsd_shutdown_complete); nn->nfsd_serv->sv_maxconn = nn->max_connections; error = svc_bind(nn->nfsd_serv, net); if (error < 0) { svc_destroy(nn->nfsd_serv); + nfsd_complete_shutdown(net); return error; } @@ -675,7 +708,7 @@ void nfsd_destroy(struct net *net) svc_shutdown_net(nn->nfsd_serv, net); svc_destroy(nn->nfsd_serv); if (destroy) - nn->nfsd_serv = NULL; + nfsd_complete_shutdown(net); } int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) From 7fe4fab870613f912662efd9cc88fa588e8d5696 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Tue, 17 Jan 2023 20:47:09 +0800 Subject: [PATCH 137/143] Revert "selftests/ftrace: Update synthetic event syntax errors" This reverts commit 31c2e369b5335d70e913afee3ae11e54d61afef2 which is commit b5734e997e1117afb479ffda500e36fa91aea3e8 upstream. The reverted commit belongs to patchset which updated synthetic event command parsing and testcase 'trigger-synthetic_event_syntax_errors.tc' Link: https://lore.kernel.org/all/20210211020950.102294806@goodmis.org/ However this testcase update was backported alone without feature update, which makes the testcase cannot pass on stable branch. Revert this commit to make the testcase correct. Fixes: 31c2e369b533 ("selftests/ftrace: Update synthetic event syntax errors") Reported-by: Chen Zhongjin Signed-off-by: Zheng Yejian Signed-off-by: Greg Kroah-Hartman --- .../trigger-synthetic_event_syntax_errors.tc | 35 +++++-------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc index 955e3ceea44b..ada594fe16cb 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc @@ -1,38 +1,19 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test synthetic_events syntax parser errors -# requires: synthetic_events error_log "char name[]' >> synthetic_events":README +# requires: synthetic_events error_log check_error() { # command-with-error-pos-by-^ ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events' } -check_dyn_error() { # command-with-error-pos-by-^ - ftrace_errlog_check 'synthetic_events' "$1" 'dynamic_events' -} - check_error 'myevent ^chr arg' # INVALID_TYPE -check_error 'myevent ^unsigned arg' # INCOMPLETE_TYPE - -check_error 'myevent char ^str]; int v' # BAD_NAME -check_error '^mye-vent char str[]' # BAD_NAME -check_error 'myevent char ^st-r[]' # BAD_NAME - -check_error 'myevent char str;^[]' # INVALID_FIELD -check_error 'myevent char str; ^int' # INVALID_FIELD - -check_error 'myevent char ^str[; int v' # INVALID_ARRAY_SPEC -check_error 'myevent char ^str[kdjdk]' # INVALID_ARRAY_SPEC -check_error 'myevent char ^str[257]' # INVALID_ARRAY_SPEC - -check_error '^mye;vent char str[]' # INVALID_CMD -check_error '^myevent ; char str[]' # INVALID_CMD -check_error '^myevent; char str[]' # INVALID_CMD -check_error '^myevent ;char str[]' # INVALID_CMD -check_error '^; char str[]' # INVALID_CMD -check_error '^;myevent char str[]' # INVALID_CMD -check_error '^myevent' # INVALID_CMD - -check_dyn_error '^s:junk/myevent char str[' # INVALID_DYN_CMD +check_error 'myevent ^char str[];; int v' # INVALID_TYPE +check_error 'myevent char ^str]; int v' # INVALID_NAME +check_error 'myevent char ^str;[]' # INVALID_NAME +check_error 'myevent ^char str[; int v' # INVALID_TYPE +check_error '^mye;vent char str[]' # BAD_NAME +check_error 'myevent char str[]; ^int' # INVALID_FIELD +check_error '^myevent' # INCOMPLETE_CMD exit 0 From 2eca102b350f5dd1870e615fb660dc0cc498b4db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Mar 2022 19:00:56 +0100 Subject: [PATCH 138/143] block: fix and cleanup bio_check_ro commit 57e95e4670d1126c103305bcf34a9442f49f6d6a upstream. Don't use a WARN_ON when printing a potentially user triggered condition. Also don't print the partno when the block device name already includes it, and use the %pg specifier to simplify printing the block device name. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220304180105.409765-2-hch@lst.de Signed-off-by: Jens Axboe Signed-off-by: Fedor Pchelkin Signed-off-by: Greg Kroah-Hartman --- block/blk-core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 26664f2a139e..9afb79b322fb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -700,9 +700,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) return false; - - WARN_ONCE(1, - "Trying to write to read-only block-device %s (partno %d)\n", + pr_warn("Trying to write to read-only block-device %s (partno %d)\n", bio_devname(bio, b), part->partno); /* Older lvm-tools actually trigger this */ return false; From e284c273dbb4c1ed68d4204bff94d0b10e4a90f5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2023 22:57:13 +0100 Subject: [PATCH 139/143] x86/i8259: Mark legacy PIC interrupts with IRQ_LEVEL commit 5fa55950729d0762a787451dc52862c3f850f859 upstream. Baoquan reported that after triggering a crash the subsequent crash-kernel fails to boot about half of the time. It triggers a NULL pointer dereference in the periodic tick code. This happens because the legacy timer interrupt (IRQ0) is resent in software which happens in soft interrupt (tasklet) context. In this context get_irq_regs() returns NULL which leads to the NULL pointer dereference. The reason for the resend is a spurious APIC interrupt on the IRQ0 vector which is captured and leads to a resend when the legacy timer interrupt is enabled. This is wrong because the legacy PIC interrupts are level triggered and therefore should never be resent in software, but nothing ever sets the IRQ_LEVEL flag on those interrupts, so the core code does not know about their trigger type. Ensure that IRQ_LEVEL is set when the legacy PCI interrupts are set up. Fixes: a4633adcdbc1 ("[PATCH] genirq: add genirq sw IRQ-retrigger") Reported-by: Baoquan He Signed-off-by: Thomas Gleixner Tested-by: Baoquan He Link: https://lore.kernel.org/r/87mt6rjrra.ffs@tglx Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/i8259.c | 1 + arch/x86/kernel/irqinit.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 282b4ee1339f..f325389d0351 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -114,6 +114,7 @@ static void make_8259A_irq(unsigned int irq) disable_irq_nosync(irq); io_apic_irqs &= ~(1<init(0); - for (i = 0; i < nr_legacy_irqs(); i++) + for (i = 0; i < nr_legacy_irqs(); i++) { irq_set_chip_and_handler(i, chip, handle_level_irq); + irq_set_status_flags(i, IRQ_LEVEL); + } } void __init init_IRQ(void) From 743435cd1705b4a3a4b8e73a538c4c1a1efc7edb Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Tue, 24 Jan 2023 02:47:21 +0100 Subject: [PATCH 140/143] netfilter: conntrack: unify established states for SCTP paths commit a44b7651489f26271ac784b70895e8a85d0cebf4 upstream. An SCTP endpoint can start an association through a path and tear it down over another one. That means the initial path will not see the shutdown sequence, and the conntrack entry will remain in ESTABLISHED state for 5 days. By merging the HEARTBEAT_ACKED and ESTABLISHED states into one ESTABLISHED state, there remains no difference between a primary or secondary path. The timeout for the merged ESTABLISHED state is set to 210 seconds (hb_interval * max_path_retrans + rto_max). So, even if a path doesn't see the shutdown sequence, it will expire in a reasonable amount of time. With this change in place, there is now more than one state from which we can transition to ESTABLISHED, COOKIE_ECHOED and HEARTBEAT_SENT, so handle the setting of ASSURED bit whenever a state change has happened and the new state is ESTABLISHED. Removed the check for dir==REPLY since the transition to ESTABLISHED can happen only in the reply direction. Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Sriram Yagnaraman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- .../uapi/linux/netfilter/nf_conntrack_sctp.h | 2 +- .../linux/netfilter/nfnetlink_cttimeout.h | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 93 ++++++++----------- net/netfilter/nf_conntrack_standalone.c | 8 -- 4 files changed, 41 insertions(+), 64 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h index edc6ddab0de6..2d6f80d75ae7 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h @@ -15,7 +15,7 @@ enum sctp_conntrack { SCTP_CONNTRACK_SHUTDOWN_RECD, SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, SCTP_CONNTRACK_HEARTBEAT_SENT, - SCTP_CONNTRACK_HEARTBEAT_ACKED, + SCTP_CONNTRACK_HEARTBEAT_ACKED, /* no longer used */ SCTP_CONNTRACK_MAX }; diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h index 6b20fb22717b..aa805e6d4e28 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h @@ -94,7 +94,7 @@ enum ctattr_timeout_sctp { CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, - CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, + CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, /* no longer used */ __CTA_TIMEOUT_SCTP_MAX }; #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 72d0aa603cd6..cec4b16170a0 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -27,22 +27,16 @@ #include #include -/* FIXME: Examine ipfilter's timeouts and conntrack transitions more - closely. They're more complex. --RR - - And so for me for SCTP :D -Kiran */ - static const char *const sctp_conntrack_names[] = { - "NONE", - "CLOSED", - "COOKIE_WAIT", - "COOKIE_ECHOED", - "ESTABLISHED", - "SHUTDOWN_SENT", - "SHUTDOWN_RECD", - "SHUTDOWN_ACK_SENT", - "HEARTBEAT_SENT", - "HEARTBEAT_ACKED", + [SCTP_CONNTRACK_NONE] = "NONE", + [SCTP_CONNTRACK_CLOSED] = "CLOSED", + [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT", + [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED", + [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED", + [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT", + [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD", + [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT", + [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT", }; #define SECS * HZ @@ -54,12 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_CLOSED] = 10 SECS, [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, - [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, + [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS, [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, - [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, }; #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1 @@ -73,7 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT -#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED #define sIV SCTP_CONNTRACK_MAX /* @@ -96,9 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of the SHUTDOWN chunk. Connection is closed. HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. -HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to - that of the HEARTBEAT chunk. Secondary connection is - established. */ /* TODO @@ -115,33 +104,33 @@ cookie echoed to closed. static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ -/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, -/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, -/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, -/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ -/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ -/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, -/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, -/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ +/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, +/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, +/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, }, { /* REPLY */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ -/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ -/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, -/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, -/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, -/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, -/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ -/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, -/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, -/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, -/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, +/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, +/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES}, } }; @@ -508,8 +497,12 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } ct->proto.sctp.state = new_state; - if (old_state != new_state) + if (old_state != new_state) { nf_conntrack_event_cache(IPCT_PROTOINFO, ct); + if (new_state == SCTP_CONNTRACK_ESTABLISHED && + !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_ASSURED, ct); + } } spin_unlock_bh(&ct->lock); @@ -523,14 +516,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); - if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && - dir == IP_CT_DIR_REPLY && - new_state == SCTP_CONNTRACK_ESTABLISHED) { - pr_debug("Setting assured bit\n"); - set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } - return NF_ACCEPT; out_unlock: diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index a7f88cdf3f87..e12b52019a55 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -583,7 +583,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, - NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, @@ -853,12 +852,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { - .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { @@ -987,7 +980,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, XASSIGN(SHUTDOWN_RECD, sn); XASSIGN(SHUTDOWN_ACK_SENT, sn); XASSIGN(HEARTBEAT_SENT, sn); - XASSIGN(HEARTBEAT_ACKED, sn); #undef XASSIGN #endif } From f84c9b72fb200633774704d8020f769c88a4b249 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Dec 2022 13:51:49 +0000 Subject: [PATCH 141/143] perf/x86/amd: fix potential integer overflow on shift of a int commit 08245672cdc6505550d1a5020603b0a8d4a6dcc7 upstream. The left shift of int 32 bit integer constant 1 is evaluated using 32 bit arithmetic and then passed as a 64 bit function argument. In the case where i is 32 or more this can lead to an overflow. Avoid this by shifting using the BIT_ULL macro instead. Fixes: 471af006a747 ("perf/x86/amd: Constrain Large Increment per Cycle events") Signed-off-by: Colin Ian King Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ian Rogers Acked-by: Kim Phillips Link: https://lore.kernel.org/r/20221202135149.1797974-1-colin.i.king@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/amd/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 39eb276d0277..52eba415928a 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -976,7 +976,7 @@ static int __init amd_core_pmu_init(void) * numbered counter following it. */ for (i = 0; i < x86_pmu.num_counters - 1; i += 2) - even_ctr_mask |= 1 << i; + even_ctr_mask |= BIT_ULL(i); pair_constraint = (struct event_constraint) __EVENT_CONSTRAINT(0, even_ctr_mask, 0, From 19f1f99be3716cc08071e25ba159813318ab0d9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 20 Jun 2022 19:18:15 +0200 Subject: [PATCH 142/143] clk: Fix pointer casting to prevent oops in devm_clk_release() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8b3d743fc9e2542822826890b482afabf0e7522a upstream. The release function is called with a pointer to the memory returned by devres_alloc(). I was confused about that by the code before the generalization that used a struct clk **ptr. Reported-by: Marek Szyprowski Fixes: abae8e57e49a ("clk: generalize devm_clk_get() a bit") Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20220620171815.114212-1-u.kleine-koenig@pengutronix.de Tested-by: Marek Szyprowski Tested-by: Linux Kernel Functional Testing Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk-devres.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index 43ccd20e0298..4fb4fd4b06bd 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -11,7 +11,7 @@ struct devm_clk_state { static void devm_clk_release(struct device *dev, void *res) { - struct devm_clk_state *state = *(struct devm_clk_state **)res; + struct devm_clk_state *state = res; if (state->exit) state->exit(state->clk); From 8d823aaa220eebec88c9f307225d3e163252ea95 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Feb 2023 08:23:27 +0100 Subject: [PATCH 143/143] Linux 5.10.166 Link: https://lore.kernel.org/r/20230130134306.862721518@linuxfoundation.org Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Pavel Machek (CIP) Tested-by: Guenter Roeck Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Tested-by: Salvatore Bonaccorso Tested-by: Jon Hunter Tested-by: Allen Pais Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5fbff8603f44..efdfb40a82ab 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 165 +SUBLEVEL = 166 EXTRAVERSION = NAME = Dare mighty things