From 1d0fe3fb5d4be366c5ba236c8ad230768e1a5c9f Mon Sep 17 00:00:00 2001 From: Tsuchiya Yuto Date: Wed, 1 Dec 2021 15:19:04 +0100 Subject: [PATCH 001/509] media: atomisp: fix "variable dereferenced before check 'asd'" commit ac56760a8bbb4e654b2fd54e5de79dd5d72f937d upstream. There are two occurrences where the variable 'asd' is dereferenced before check. Fix this issue by using the variable after the check. Link: https://lore.kernel.org/linux-media/20211122074122.GA6581@kili/ Link: https://lore.kernel.org/linux-media/20211201141904.47231-1-kitakar@gmail.com Reported-by: Dan Carpenter Signed-off-by: Tsuchiya Yuto Signed-off-by: Mauro Carvalho Chehab Igned-off-by: Anastasia Belova Signed-off-by: Greg Kroah-Hartman --- drivers/staging/media/atomisp/pci/atomisp_cmd.c | 3 ++- drivers/staging/media/atomisp/pci/atomisp_ioctl.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c index 20c19e08968e..613bd9620224 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c +++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c @@ -5243,7 +5243,7 @@ static int atomisp_set_fmt_to_isp(struct video_device *vdev, int (*configure_pp_input)(struct atomisp_sub_device *asd, unsigned int width, unsigned int height) = configure_pp_input_nop; - u16 stream_index = atomisp_source_pad_to_stream_id(asd, source_pad); + u16 stream_index; const struct atomisp_in_fmt_conv *fc; int ret, i; @@ -5252,6 +5252,7 @@ static int atomisp_set_fmt_to_isp(struct video_device *vdev, __func__, vdev->name); return -EINVAL; } + stream_index = atomisp_source_pad_to_stream_id(asd, source_pad); v4l2_fh_init(&fh.vfh, vdev); diff --git a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c index 8a0648fd7c81..4615e4cae718 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c +++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c @@ -1123,7 +1123,7 @@ int __atomisp_reqbufs(struct file *file, void *fh, struct ia_css_frame *frame; struct videobuf_vmalloc_memory *vm_mem; u16 source_pad = atomisp_subdev_source_pad(vdev); - u16 stream_id = atomisp_source_pad_to_stream_id(asd, source_pad); + u16 stream_id; int ret = 0, i = 0; if (!asd) { @@ -1131,6 +1131,7 @@ int __atomisp_reqbufs(struct file *file, void *fh, __func__, vdev->name); return -EINVAL; } + stream_id = atomisp_source_pad_to_stream_id(asd, source_pad); if (req->count == 0) { mutex_lock(&pipe->capq.vb_lock); From 8667f7113107c8fab07ddd088e16c6a10d6d395f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Jun 2023 22:33:55 +0200 Subject: [PATCH 002/509] x86/smp: Use dedicated cache-line for mwait_play_dead() commit f9c9987bf52f4e42e940ae217333ebb5a4c3b506 upstream. Monitoring idletask::thread_info::flags in mwait_play_dead() has been an obvious choice as all what is needed is a cache line which is not written by other CPUs. But there is a use case where a "dead" CPU needs to be brought out of MWAIT: kexec(). This is required as kexec() can overwrite text, pagetables, stacks and the monitored cacheline of the original kernel. The latter causes MWAIT to resume execution which obviously causes havoc on the kexec kernel which results usually in triple faults. Use a dedicated per CPU storage to prepare for that. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230615193330.434553750@linutronix.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/smpboot.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bda89ecc7799..d2403da17842 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -100,6 +100,17 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +struct mwait_cpu_dead { + unsigned int control; + unsigned int status; +}; + +/* + * Cache line aligned data for mwait_play_dead(). Separate on purpose so + * that it's unlikely to be touched by other CPUs. + */ +static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); + /* Logical package management. We might want to allocate that dynamically */ unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); @@ -1674,10 +1685,10 @@ EXPORT_SYMBOL_GPL(cond_wakeup_cpu0); */ static inline void mwait_play_dead(void) { + struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); unsigned int eax, ebx, ecx, edx; unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; - void *mwait_ptr; int i; if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || @@ -1712,13 +1723,6 @@ static inline void mwait_play_dead(void) (highest_subcstate - 1); } - /* - * This should be a memory location in a cache line which is - * unlikely to be touched by other processors. The actual - * content is immaterial as it is not actually modified in any way. - */ - mwait_ptr = ¤t_thread_info()->flags; - wbinvd(); while (1) { @@ -1730,9 +1734,9 @@ static inline void mwait_play_dead(void) * case where we return around the loop. */ mb(); - clflush(mwait_ptr); + clflush(md); mb(); - __monitor(mwait_ptr, 0, 0); + __monitor(md, 0, 0); mb(); __mwait(eax, 0); From 02fbf62df99f208454097404567da5335bb5b55f Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 7 Jun 2023 09:27:08 +0200 Subject: [PATCH 003/509] can: isotp: isotp_sendmsg(): fix return error fix on TX path commit e38910c0072b541a91954682c8b074a93e57c09b upstream. With commit d674a8f123b4 ("can: isotp: isotp_sendmsg(): fix return error on FC timeout on TX path") the missing correct return value in the case of a protocol error was introduced. But the way the error value has been read and sent to the user space does not follow the common scheme to clear the error after reading which is provided by the sock_error() function. This leads to an error report at the following write() attempt although everything should be working. Fixes: d674a8f123b4 ("can: isotp: isotp_sendmsg(): fix return error on FC timeout on TX path") Reported-by: Carsten Schmidt Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230607072708.38809-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/isotp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 4fcd8162fad4..16ebc187af1c 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -990,8 +990,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* wait for complete transmission of current pdu */ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); - if (sk->sk_err) - return -sk->sk_err; + err = sock_error(sk); + if (err) + return err; } return size; From 5b813734a0d221189f1430f89edff4c275f30167 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 3 May 2021 13:57:34 +0200 Subject: [PATCH 004/509] video: imsttfb: check for ioremap() failures commit 13b7c0390a5d3840e1e2cda8f44a310fdbb982de upstream. We should check if ioremap() were to somehow fail in imsttfb_probe() and handle the unwinding of the resources allocated here properly. Ideally if anyone cares about this driver (it's for a PowerMac era PCI display card), they wouldn't even be using fbdev anymore. Or the devm_* apis could be used, but that's just extra work for diminishing returns... Cc: Finn Thain Cc: Bartlomiej Zolnierkiewicz Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20210503115736.2104747-68-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/imsttfb.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index e04411701ec8..16f272a50811 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1469,6 +1469,7 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct imstt_par *par; struct fb_info *info; struct device_node *dp; + int ret = -ENOMEM; dp = pci_device_to_OF_node(pdev); if(dp) @@ -1504,23 +1505,37 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) default: printk(KERN_INFO "imsttfb: Device 0x%x unknown, " "contact maintainer.\n", pdev->device); - release_mem_region(addr, size); - framebuffer_release(info); - return -ENODEV; + ret = -ENODEV; + goto error; } info->fix.smem_start = addr; info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ? 0x400000 : 0x800000); + if (!info->screen_base) + goto error; info->fix.mmio_start = addr + 0x800000; par->dc_regs = ioremap(addr + 0x800000, 0x1000); + if (!par->dc_regs) + goto error; par->cmap_regs_phys = addr + 0x840000; par->cmap_regs = (__u8 *)ioremap(addr + 0x840000, 0x1000); + if (!par->cmap_regs) + goto error; info->pseudo_palette = par->palette; init_imstt(info); pci_set_drvdata(pdev, info); return 0; + +error: + if (par->dc_regs) + iounmap(par->dc_regs); + if (info->screen_base) + iounmap(info->screen_base); + release_mem_region(addr, size); + framebuffer_release(info); + return ret; } static void imsttfb_remove(struct pci_dev *pdev) From 2bf70b88cc358a437db376826f92c8dcf9c23587 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 27 Apr 2023 11:08:41 +0800 Subject: [PATCH 005/509] fbdev: imsttfb: Fix use after free bug in imsttfb_probe commit c75f5a55061091030a13fef71b9995b89bc86213 upstream. A use-after-free bug may occur if init_imstt invokes framebuffer_release and free the info ptr. The caller, imsttfb_probe didn't notice that and still keep the ptr as private data in pdev. If we remove the driver which will call imsttfb_remove to make cleanup, UAF happens. Fix it by return error code if bad case happens in init_imstt. Signed-off-by: Zheng Wang Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/imsttfb.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c index 16f272a50811..1b2fb8ed7623 100644 --- a/drivers/video/fbdev/imsttfb.c +++ b/drivers/video/fbdev/imsttfb.c @@ -1346,7 +1346,7 @@ static const struct fb_ops imsttfb_ops = { .fb_ioctl = imsttfb_ioctl, }; -static void init_imstt(struct fb_info *info) +static int init_imstt(struct fb_info *info) { struct imstt_par *par = info->par; __u32 i, tmp, *ip, *end; @@ -1419,7 +1419,7 @@ static void init_imstt(struct fb_info *info) || !(compute_imstt_regvals(par, info->var.xres, info->var.yres))) { printk("imsttfb: %ux%ux%u not supported\n", info->var.xres, info->var.yres, info->var.bits_per_pixel); framebuffer_release(info); - return; + return -ENODEV; } sprintf(info->fix.id, "IMS TT (%s)", par->ramdac == IBM ? "IBM" : "TVP"); @@ -1455,12 +1455,13 @@ static void init_imstt(struct fb_info *info) if (register_framebuffer(info) < 0) { framebuffer_release(info); - return; + return -ENODEV; } tmp = (read_reg_le32(par->dc_regs, SSTATUS) & 0x0f00) >> 8; fb_info(info, "%s frame buffer; %uMB vram; chip version %u\n", info->fix.id, info->fix.smem_len >> 20, tmp); + return 0; } static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -1523,10 +1524,10 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!par->cmap_regs) goto error; info->pseudo_palette = par->palette; - init_imstt(info); - - pci_set_drvdata(pdev, info); - return 0; + ret = init_imstt(info); + if (!ret) + pci_set_drvdata(pdev, info); + return ret; error: if (par->dc_regs) From 9598a647ecc8f300b0540abf9d3b3439859d163b Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Thu, 8 Jun 2023 14:38:28 -0700 Subject: [PATCH 006/509] HID: wacom: Use ktime_t rather than int when dealing with timestamps commit 9a6c0e28e215535b2938c61ded54603b4e5814c5 upstream. Code which interacts with timestamps needs to use the ktime_t type returned by functions like ktime_get. The int type does not offer enough space to store these values, and attempting to use it is a recipe for problems. In this particular case, overflows would occur when calculating/storing timestamps leading to incorrect values being reported to userspace. In some cases these bad timestamps cause input handling in userspace to appear hung. Link: https://gitlab.freedesktop.org/libinput/libinput/-/issues/901 Fixes: 17d793f3ed53 ("HID: wacom: insert timestamp to packed Bluetooth (BT) events") CC: stable@vger.kernel.org Signed-off-by: Jason Gerecke Reviewed-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20230608213828.2108-1-jason.gerecke@wacom.com Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/wacom_wac.c | 6 +++--- drivers/hid/wacom_wac.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1bfcc94c1d23..de6fe9866870 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1307,7 +1307,7 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) struct input_dev *pen_input = wacom->pen_input; unsigned char *data = wacom->data; int number_of_valid_frames = 0; - int time_interval = 15000000; + ktime_t time_interval = 15000000; ktime_t time_packet_received = ktime_get(); int i; @@ -1341,7 +1341,7 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) if (number_of_valid_frames) { if (wacom->hid_data.time_delayed) time_interval = ktime_get() - wacom->hid_data.time_delayed; - time_interval /= number_of_valid_frames; + time_interval = div_u64(time_interval, number_of_valid_frames); wacom->hid_data.time_delayed = time_packet_received; } @@ -1352,7 +1352,7 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom) bool range = frame[0] & 0x20; bool invert = frame[0] & 0x10; int frames_number_reversed = number_of_valid_frames - i - 1; - int event_timestamp = time_packet_received - frames_number_reversed * time_interval; + ktime_t event_timestamp = time_packet_received - frames_number_reversed * time_interval; if (!valid) continue; diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index 88badfbae999..166731292c35 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -320,7 +320,7 @@ struct hid_data { int bat_connected; int ps_connected; bool pad_input_event_flag; - int time_delayed; + ktime_t time_delayed; }; struct wacom_remote_data { From 02a4c4e225f4185fdc065c43167892136c734d11 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Sun, 18 Jun 2023 08:09:57 +0900 Subject: [PATCH 007/509] HID: logitech-hidpp: add HIDPP_QUIRK_DELAYED_INIT for the T651. commit 5fe251112646d8626818ea90f7af325bab243efa upstream. commit 498ba2069035 ("HID: logitech-hidpp: Don't restart communication if not necessary") put restarting communication behind that flag, and this was apparently necessary on the T651, but the flag was not set for it. Fixes: 498ba2069035 ("HID: logitech-hidpp: Don't restart communication if not necessary") Cc: stable@vger.kernel.org Signed-off-by: Mike Hommey Link: https://lore.kernel.org/r/20230617230957.6mx73th4blv7owqk@glandium.org Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-logitech-hidpp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 2e32a21bbcbf..6ac0c2d9a147 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4009,7 +4009,7 @@ static const struct hid_device_id hidpp_devices[] = { { /* wireless touchpad T651 */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_T651), - .driver_data = HIDPP_QUIRK_CLASS_WTP }, + .driver_data = HIDPP_QUIRK_CLASS_WTP | HIDPP_QUIRK_DELAYED_INIT }, { /* Mouse Logitech Anywhere MX */ LDJ_DEVICE(0x1017), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 }, { /* Mouse Logitech Cube */ From 649104c834baf7f189bfdc7757754a0d88794bbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Thu, 25 May 2023 14:18:11 +0200 Subject: [PATCH 008/509] Revert "thermal/drivers/mediatek: Use devm_of_iomap to avoid resource leak in mtk_thermal_probe" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 86edac7d3888c715fe3a81bd61f3617ecfe2e1dd upstream. This reverts commit f05c7b7d9ea9477fcc388476c6f4ade8c66d2d26. That change was causing a regression in the generic-adc-thermal-probed bootrr test as reported in the kernelci-results list [1]. A proper rework will take longer, so revert it for now. [1] https://groups.io/g/kernelci-results/message/42660 Fixes: f05c7b7d9ea9 ("thermal/drivers/mediatek: Use devm_of_iomap to avoid resource leak in mtk_thermal_probe") Signed-off-by: Ricardo CaƱuelo Suggested-by: AngeloGioacchino Del Regno Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230525121811.3360268-1-ricardo.canuelo@collabora.com Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/mtk_thermal.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c index 9fe169dbed88..0bd7aa564bc2 100644 --- a/drivers/thermal/mtk_thermal.c +++ b/drivers/thermal/mtk_thermal.c @@ -1026,12 +1026,7 @@ static int mtk_thermal_probe(struct platform_device *pdev) return -ENODEV; } - auxadc_base = devm_of_iomap(&pdev->dev, auxadc, 0, NULL); - if (IS_ERR(auxadc_base)) { - of_node_put(auxadc); - return PTR_ERR(auxadc_base); - } - + auxadc_base = of_iomap(auxadc, 0); auxadc_phys_base = of_get_phys_base(auxadc); of_node_put(auxadc); @@ -1047,12 +1042,7 @@ static int mtk_thermal_probe(struct platform_device *pdev) return -ENODEV; } - apmixed_base = devm_of_iomap(&pdev->dev, apmixedsys, 0, NULL); - if (IS_ERR(apmixed_base)) { - of_node_put(apmixedsys); - return PTR_ERR(apmixed_base); - } - + apmixed_base = of_iomap(apmixedsys, 0); apmixed_phys_base = of_get_phys_base(apmixedsys); of_node_put(apmixedsys); From c484b65f93e0fefadd4b0b3bdeff9a2873838819 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 15 May 2023 19:32:16 +0200 Subject: [PATCH 009/509] scripts/tags.sh: Resolve gtags empty index generation commit e1b37563caffc410bb4b55f153ccb14dede66815 upstream. gtags considers any file outside of its current working directory "outside the source tree" and refuses to index it. For O= kernel builds, or when "make" is invoked from a directory other then the kernel source tree, gtags ignores the entire kernel source and generates an empty index. Force-set gtags current working directory to the kernel source tree. Due to commit 9da0763bdd82 ("kbuild: Use relative path when building in a subdir of the source tree"), if the kernel build is done in a sub-directory of the kernel source tree, the kernel Makefile will set the kernel's $srctree to ".." for shorter compile-time and run-time warnings. Consequently, the list of files to be indexed will be in the "../*" form, rendering all such paths invalid once gtags switches to the kernel source tree as its current working directory. If gtags indexing is requested and the build directory is not the kernel source tree, index all files in absolute-path form. Note, indexing in absolute-path form will not affect the generated index, as paths in gtags indices are always relative to the gtags "root directory" anyway (as evidenced by "gtags --dump"). Signed-off-by: Ahmed S. Darwish Cc: Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- scripts/tags.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index b82aebb0c995..d17801c9d85a 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -32,6 +32,13 @@ else ignore="$ignore ( -path ${tree}tools ) -prune -o" fi +# gtags(1) refuses to index any file outside of its current working dir. +# If gtags indexing is requested and the build output directory is not +# the kernel source tree, index all files in absolute-path form. +if [[ "$1" == "gtags" && -n "${tree}" ]]; then + tree=$(realpath "$tree")/ +fi + # Detect if ALLSOURCE_ARCHS is set. If not, we assume SRCARCH if [ "${ALLSOURCE_ARCHS}" = "" ]; then ALLSOURCE_ARCHS=${SRCARCH} @@ -131,7 +138,7 @@ docscope() dogtags() { - all_target_sources | gtags -i -f - + all_target_sources | gtags -i -C "${tree:-.}" -f - "$PWD" } # Basic regular expressions with an optional /kind-spec/ for ctags and From 0336c8f072237e6a0fdacdde3de7f02abd1bc0b3 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sat, 13 May 2023 14:51:00 +0200 Subject: [PATCH 010/509] drm/amdgpu: Validate VM ioctl flags. commit a2b308044dcaca8d3e580959a4f867a1d5c37fac upstream. None have been defined yet, so reject anybody setting any. Mesa sets it to 0 anyway. Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 45b1f00c5968..c705ce11c436 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3252,6 +3252,10 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) long timeout = msecs_to_jiffies(2000); int r; + /* No valid flags defined yet */ + if (args->in.flags) + return -EINVAL; + switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ From f70407e8e0272e00d133c5e039168ff1bae6bcac Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Tue, 14 Mar 2023 19:51:59 +1100 Subject: [PATCH 011/509] nubus: Partially revert proc_create_single_data() conversion commit 0e96647cff9224db564a1cee6efccb13dbe11ee2 upstream. The conversion to proc_create_single_data() introduced a regression whereby reading a file in /proc/bus/nubus results in a seg fault: # grep -r . /proc/bus/nubus/e/ Data read fault at 0x00000020 in Super Data (pc=0x1074c2) BAD KERNEL BUSERR Oops: 00000000 Modules linked in: PC: [<001074c2>] PDE_DATA+0xc/0x16 SR: 2010 SP: 38284958 a2: 01152370 d0: 00000001 d1: 01013000 d2: 01002790 d3: 00000000 d4: 00000001 d5: 0008ce2e a0: 00000000 a1: 00222a40 Process grep (pid: 45, task=142f8727) Frame format=B ssw=074d isc=2008 isb=4e5e daddr=00000020 dobuf=01199e70 baddr=001074c8 dibuf=ffffffff ver=f Stack from 01199e48: 01199e70 00222a58 01002790 00000000 011a3000 01199eb0 015000c0 00000000 00000000 01199ec0 01199ec0 000d551a 011a3000 00000001 00000000 00018000 d003f000 00000003 00000001 0002800d 01052840 01199fa8 c01f8000 00000000 00000029 0b532b80 00000000 00000000 00000029 0b532b80 01199ee4 00103640 011198c0 d003f000 00018000 01199fa8 00000000 011198c0 00000000 01199f4c 000b3344 011198c0 d003f000 00018000 01199fa8 00000000 00018000 011198c0 Call Trace: [<00222a58>] nubus_proc_rsrc_show+0x18/0xa0 [<000d551a>] seq_read+0xc4/0x510 [<00018000>] fp_fcos+0x2/0x82 [<0002800d>] __sys_setreuid+0x115/0x1c6 [<00103640>] proc_reg_read+0x5c/0xb0 [<00018000>] fp_fcos+0x2/0x82 [<000b3344>] __vfs_read+0x2c/0x13c [<00018000>] fp_fcos+0x2/0x82 [<00018000>] fp_fcos+0x2/0x82 [<000b8aa2>] sys_statx+0x60/0x7e [<000b34b6>] vfs_read+0x62/0x12a [<00018000>] fp_fcos+0x2/0x82 [<00018000>] fp_fcos+0x2/0x82 [<000b39c2>] ksys_read+0x48/0xbe [<00018000>] fp_fcos+0x2/0x82 [<000b3a4e>] sys_read+0x16/0x1a [<00018000>] fp_fcos+0x2/0x82 [<00002b84>] syscall+0x8/0xc [<00018000>] fp_fcos+0x2/0x82 [<0000c016>] not_ext+0xa/0x18 Code: 4e5e 4e75 4e56 0000 206e 0008 2068 ffe8 <2068> 0020 2008 4e5e 4e75 4e56 0000 2f0b 206e 0008 2068 0004 2668 0020 206b ffe8 Disabling lock debugging due to kernel taint Segmentation fault The proc_create_single_data() conversion does not work because single_open(file, nubus_proc_rsrc_show, PDE_DATA(inode)) is not equivalent to the original code. Fixes: 3f3942aca6da ("proc: introduce proc_create_single{,_data}") Cc: Christoph Hellwig Cc: stable@vger.kernel.org # 5.6+ Signed-off-by: Finn Thain Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/d4e2a586e793cc8d9442595684ab8a077c0fe726.1678783919.git.fthain@linux-m68k.org Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/nubus/proc.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index 88e1f9a0faaf..78cf0e7b53d5 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -137,6 +137,18 @@ static int nubus_proc_rsrc_show(struct seq_file *m, void *v) return 0; } +static int nubus_rsrc_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, nubus_proc_rsrc_show, inode); +} + +static const struct proc_ops nubus_rsrc_proc_ops = { + .proc_open = nubus_rsrc_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir, const struct nubus_dirent *ent, unsigned int size) @@ -152,8 +164,8 @@ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir, pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size); else pde_data = NULL; - proc_create_single_data(name, S_IFREG | 0444, procdir, - nubus_proc_rsrc_show, pde_data); + proc_create_data(name, S_IFREG | 0444, procdir, + &nubus_rsrc_proc_ops, pde_data); } void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, @@ -166,9 +178,9 @@ void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, return; snprintf(name, sizeof(name), "%x", ent->type); - proc_create_single_data(name, S_IFREG | 0444, procdir, - nubus_proc_rsrc_show, - nubus_proc_alloc_pde_data(data, 0)); + proc_create_data(name, S_IFREG | 0444, procdir, + &nubus_rsrc_proc_ops, + nubus_proc_alloc_pde_data(data, 0)); } /* From 1a82005f3f636e31e370f9d878ef2ca6e61b839f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 21:56:12 +0200 Subject: [PATCH 012/509] fs: pipe: reveal missing function protoypes [ Upstream commit 247c8d2f9837a3e29e3b6b7a4aa9c36c37659dd4 ] A couple of functions from fs/pipe.c are used both internally and for the watch queue code, but the declaration is only visible when the latter is enabled: fs/pipe.c:1254:5: error: no previous prototype for 'pipe_resize_ring' fs/pipe.c:758:15: error: no previous prototype for 'account_pipe_buffers' fs/pipe.c:764:6: error: no previous prototype for 'too_many_pipe_buffers_soft' fs/pipe.c:771:6: error: no previous prototype for 'too_many_pipe_buffers_hard' fs/pipe.c:777:6: error: no previous prototype for 'pipe_is_unprivileged_user' Make the visible unconditionally to avoid these warnings. Fixes: c73be61cede5 ("pipe: Add general notification queue support") Signed-off-by: Arnd Bergmann Message-Id: <20230516195629.551602-1-arnd@kernel.org> Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- include/linux/pipe_fs_i.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index c0b6ec6bf65b..ef236dbaa294 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -256,18 +256,14 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; -#ifdef CONFIG_WATCH_QUEUE unsigned long account_pipe_buffers(struct user_struct *user, unsigned long old, unsigned long new); bool too_many_pipe_buffers_soft(unsigned long user_bufs); bool too_many_pipe_buffers_hard(unsigned long user_bufs); bool pipe_is_unprivileged_user(void); -#endif /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ -#ifdef CONFIG_WATCH_QUEUE int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); -#endif long pipe_fcntl(struct file *, unsigned int, unsigned long arg); struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); From 3db97cc79b828d99a2dc9298050733c12a8776b3 Mon Sep 17 00:00:00 2001 From: Shawn Wang Date: Mon, 15 May 2023 14:04:48 +0800 Subject: [PATCH 013/509] x86/resctrl: Only show tasks' pid in current pid namespace [ Upstream commit 2997d94b5dd0e8b10076f5e0b6f18410c73e28bd ] When writing a task id to the "tasks" file in an rdtgroup, rdtgroup_tasks_write() treats the pid as a number in the current pid namespace. But when reading the "tasks" file, rdtgroup_tasks_show() shows the list of global pids from the init namespace, which is confusing and incorrect. To be more robust, let the "tasks" file only show pids in the current pid namespace. Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Signed-off-by: Shawn Wang Signed-off-by: Borislav Petkov (AMD) Acked-by: Reinette Chatre Acked-by: Fenghua Yu Tested-by: Reinette Chatre Link: https://lore.kernel.org/all/20230116071246.97717-1-shawnwang@linux.alibaba.com/ Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1a943743cfe4..1e73b6fae3b4 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -715,11 +715,15 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s) { struct task_struct *p, *t; + pid_t pid; rcu_read_lock(); for_each_process_thread(p, t) { - if (is_closid_match(t, r) || is_rmid_match(t, r)) - seq_printf(s, "%d\n", t->pid); + if (is_closid_match(t, r) || is_rmid_match(t, r)) { + pid = task_pid_vnr(t); + if (pid) + seq_printf(s, "%d\n", pid); + } } rcu_read_unlock(); } From 8563b58a4360e648ce18f0e98a75a4be51667431 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Sat, 27 May 2023 17:19:04 +0800 Subject: [PATCH 014/509] blk-iocost: use spin_lock_irqsave in adjust_inuse_and_calc_cost [ Upstream commit 8d211554679d0b23702bd32ba04aeac0c1c4f660 ] adjust_inuse_and_calc_cost() use spin_lock_irq() and IRQ will be enabled when unlock. DEADLOCK might happen if we have held other locks and disabled IRQ before invoking it. Fix it by using spin_lock_irqsave() instead, which can keep IRQ state consistent with before when unlock. ================================ WARNING: inconsistent lock state 5.10.0-02758-g8e5f91fd772f #26 Not tainted -------------------------------- inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. kworker/2:3/388 [HC0[0]:SC0[0]:HE0:SE1] takes: ffff888118c00c28 (&bfqd->lock){?.-.}-{2:2}, at: spin_lock_irq ffff888118c00c28 (&bfqd->lock){?.-.}-{2:2}, at: bfq_bio_merge+0x141/0x390 {IN-HARDIRQ-W} state was registered at: __lock_acquire+0x3d7/0x1070 lock_acquire+0x197/0x4a0 __raw_spin_lock_irqsave _raw_spin_lock_irqsave+0x3b/0x60 bfq_idle_slice_timer_body bfq_idle_slice_timer+0x53/0x1d0 __run_hrtimer+0x477/0xa70 __hrtimer_run_queues+0x1c6/0x2d0 hrtimer_interrupt+0x302/0x9e0 local_apic_timer_interrupt __sysvec_apic_timer_interrupt+0xfd/0x420 run_sysvec_on_irqstack_cond sysvec_apic_timer_interrupt+0x46/0xa0 asm_sysvec_apic_timer_interrupt+0x12/0x20 irq event stamp: 837522 hardirqs last enabled at (837521): [] __raw_spin_unlock_irqrestore hardirqs last enabled at (837521): [] _raw_spin_unlock_irqrestore+0x3d/0x40 hardirqs last disabled at (837522): [] __raw_spin_lock_irq hardirqs last disabled at (837522): [] _raw_spin_lock_irq+0x43/0x50 softirqs last enabled at (835852): [] __do_softirq+0x558/0x8ec softirqs last disabled at (835845): [] asm_call_irq_on_stack+0xf/0x20 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&bfqd->lock); lock(&bfqd->lock); *** DEADLOCK *** 3 locks held by kworker/2:3/388: #0: ffff888107af0f38 ((wq_completion)kthrotld){+.+.}-{0:0}, at: process_one_work+0x742/0x13f0 #1: ffff8881176bfdd8 ((work_completion)(&td->dispatch_work)){+.+.}-{0:0}, at: process_one_work+0x777/0x13f0 #2: ffff888118c00c28 (&bfqd->lock){?.-.}-{2:2}, at: spin_lock_irq #2: ffff888118c00c28 (&bfqd->lock){?.-.}-{2:2}, at: bfq_bio_merge+0x141/0x390 stack backtrace: CPU: 2 PID: 388 Comm: kworker/2:3 Not tainted 5.10.0-02758-g8e5f91fd772f #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Workqueue: kthrotld blk_throtl_dispatch_work_fn Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x107/0x167 print_usage_bug valid_state mark_lock_irq.cold+0x32/0x3a mark_lock+0x693/0xbc0 mark_held_locks+0x9e/0xe0 __trace_hardirqs_on_caller lockdep_hardirqs_on_prepare.part.0+0x151/0x360 trace_hardirqs_on+0x5b/0x180 __raw_spin_unlock_irq _raw_spin_unlock_irq+0x24/0x40 spin_unlock_irq adjust_inuse_and_calc_cost+0x4fb/0x970 ioc_rqos_merge+0x277/0x740 __rq_qos_merge+0x62/0xb0 rq_qos_merge bio_attempt_back_merge+0x12c/0x4a0 blk_mq_sched_try_merge+0x1b6/0x4d0 bfq_bio_merge+0x24a/0x390 __blk_mq_sched_bio_merge+0xa6/0x460 blk_mq_sched_bio_merge blk_mq_submit_bio+0x2e7/0x1ee0 __submit_bio_noacct_mq+0x175/0x3b0 submit_bio_noacct+0x1fb/0x270 blk_throtl_dispatch_work_fn+0x1ef/0x2b0 process_one_work+0x83e/0x13f0 process_scheduled_works worker_thread+0x7e3/0xd80 kthread+0x353/0x470 ret_from_fork+0x1f/0x30 Fixes: b0853ab4a238 ("blk-iocost: revamp in-period donation snapbacks") Signed-off-by: Li Nan Acked-by: Tejun Heo Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20230527091904.3001833-1-linan666@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-iocost.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 105ad23dff06..7ba7c4e4e4c9 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2426,6 +2426,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, u32 hwi, adj_step; s64 margin; u64 cost, new_inuse; + unsigned long flags; current_hweight(iocg, NULL, &hwi); old_hwi = hwi; @@ -2444,11 +2445,11 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, iocg->inuse == iocg->active) return cost; - spin_lock_irq(&ioc->lock); + spin_lock_irqsave(&ioc->lock, flags); /* we own inuse only when @iocg is in the normal active state */ if (iocg->abs_vdebt || list_empty(&iocg->active_list)) { - spin_unlock_irq(&ioc->lock); + spin_unlock_irqrestore(&ioc->lock, flags); return cost; } @@ -2469,7 +2470,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, } while (time_after64(vtime + cost, now->vnow) && iocg->inuse != iocg->active); - spin_unlock_irq(&ioc->lock); + spin_unlock_irqrestore(&ioc->lock, flags); TRACE_IOCG_PATH(inuse_adjust, iocg, now, old_inuse, iocg->inuse, old_hwi, hwi); From 39fa14e824acfd470db4f42c354297456bd82b53 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 15 May 2023 21:48:05 +0800 Subject: [PATCH 015/509] md/raid10: check slab-out-of-bounds in md_bitmap_get_counter [ Upstream commit 301867b1c16805aebbc306aafa6ecdc68b73c7e5 ] If we write a large number to md/bitmap_set_bits, md_bitmap_checkpage() will return -EINVAL because 'page >= bitmap->pages', but the return value was not checked immediately in md_bitmap_get_counter() in order to set *blocks value and slab-out-of-bounds occurs. Move check of 'page >= bitmap->pages' to md_bitmap_get_counter() and return directly if true. Fixes: ef4256733506 ("md/bitmap: optimise scanning of empty bitmaps.") Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230515134808.3936750-2-linan666@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md-bitmap.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 20afc0aec177..f843ade442de 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -54,14 +54,7 @@ __acquires(bitmap->lock) { unsigned char *mappage; - if (page >= bitmap->pages) { - /* This can happen if bitmap_start_sync goes beyond - * End-of-device while looking for a whole page. - * It is harmless. - */ - return -EINVAL; - } - + WARN_ON_ONCE(page >= bitmap->pages); if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ return 0; @@ -1365,6 +1358,14 @@ __acquires(bitmap->lock) sector_t csize; int err; + if (page >= bitmap->pages) { + /* + * This can happen if bitmap_start_sync goes beyond + * End-of-device while looking for a whole page or + * user set a huge number to sysfs bitmap_set_bits. + */ + return NULL; + } err = md_bitmap_checkpage(bitmap, page, create, 0); if (bitmap->bp[page].hijacked || From b3a0bc4a01fa8dc308bf04df7b76358c31fb0651 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 22 May 2023 15:25:33 +0800 Subject: [PATCH 016/509] md/raid10: fix overflow of md/safe_mode_delay [ Upstream commit 6beb489b2eed25978523f379a605073f99240c50 ] There is no input check when echo md/safe_mode_delay in safe_delay_store(). And msec might also overflow when HZ < 1000 in safe_delay_show(), Fix it by checking overflow in safe_delay_store() and use unsigned long conversion in safe_delay_show(). Fixes: 72e02075a33f ("md: factor out parsing of fixed-point numbers") Signed-off-by: Li Nan Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230522072535.1523740-2-linan666@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1553c2495841..204838a6d443 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3890,8 +3890,9 @@ int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale) static ssize_t safe_delay_show(struct mddev *mddev, char *page) { - int msec = (mddev->safemode_delay*1000)/HZ; - return sprintf(page, "%d.%03d\n", msec/1000, msec%1000); + unsigned int msec = ((unsigned long)mddev->safemode_delay*1000)/HZ; + + return sprintf(page, "%u.%03u\n", msec/1000, msec%1000); } static ssize_t safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len) @@ -3903,7 +3904,7 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len) return -EINVAL; } - if (strict_strtoul_scaled(cbuf, &msec, 3) < 0) + if (strict_strtoul_scaled(cbuf, &msec, 3) < 0 || msec > UINT_MAX / HZ) return -EINVAL; if (msec == 0) mddev->safemode_delay = 0; From b1d8f38310bce3282374983b229d94edbaf1e570 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Mon, 22 May 2023 15:25:34 +0800 Subject: [PATCH 017/509] md/raid10: fix wrong setting of max_corr_read_errors [ Upstream commit f8b20a405428803bd9881881d8242c9d72c6b2b2 ] There is no input check when echo md/max_read_errors and overflow might occur. Add check of input number. Fixes: 1e50915fe0bb ("raid: improve MD/raid10 handling of correctable read errors.") Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230522072535.1523740-3-linan666@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 204838a6d443..bbf39abc32b7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4574,6 +4574,8 @@ max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len rv = kstrtouint(buf, 10, &n); if (rv < 0) return rv; + if (n > INT_MAX) + return -EINVAL; atomic_set(&mddev->max_corr_read_errors, n); return len; } From 2990e2ece18dd4cca71b3109c80517ad94adb065 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Sat, 27 May 2023 15:22:15 +0800 Subject: [PATCH 018/509] md/raid10: fix null-ptr-deref of mreplace in raid10_sync_request [ Upstream commit 34817a2441747b48e444cb0e05d84e14bc9443da ] There are two check of 'mreplace' in raid10_sync_request(). In the first check, 'need_replace' will be set and 'mreplace' will be used later if no-Faulty 'mreplace' exists, In the second check, 'mreplace' will be set to NULL if it is Faulty, but 'need_replace' will not be changed accordingly. null-ptr-deref occurs if Faulty is set between two check. Fix it by merging two checks into one. And replace 'need_replace' with 'mreplace' because their values are always the same. Fixes: ee37d7314a32 ("md/raid10: Fix raid10 replace hang when new added disk faulty") Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230527072218.2365857-2-linan666@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/raid10.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6a0459f9fafb..01680029f0de 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3037,7 +3037,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, int must_sync; int any_working; int need_recover = 0; - int need_replace = 0; struct raid10_info *mirror = &conf->mirrors[i]; struct md_rdev *mrdev, *mreplace; @@ -3049,11 +3048,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, !test_bit(Faulty, &mrdev->flags) && !test_bit(In_sync, &mrdev->flags)) need_recover = 1; - if (mreplace != NULL && - !test_bit(Faulty, &mreplace->flags)) - need_replace = 1; + if (mreplace && test_bit(Faulty, &mreplace->flags)) + mreplace = NULL; - if (!need_recover && !need_replace) { + if (!need_recover && !mreplace) { rcu_read_unlock(); continue; } @@ -3069,8 +3067,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, rcu_read_unlock(); continue; } - if (mreplace && test_bit(Faulty, &mreplace->flags)) - mreplace = NULL; /* Unless we are doing a full sync, or a replacement * we only need to recover the block if it is set in * the bitmap @@ -3193,11 +3189,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r10_bio->devs[1].repl_bio; if (bio) bio->bi_end_io = NULL; - /* Note: if need_replace, then bio + /* Note: if replace is not NULL, then bio * cannot be NULL as r10buf_pool_alloc will * have allocated it. */ - if (!need_replace) + if (!mreplace) break; bio->bi_next = biolist; biolist = bio; From 9d1cccdad080a5fbb3f78baa9533fae9cf6b162a Mon Sep 17 00:00:00 2001 From: Li Nan Date: Fri, 2 Jun 2023 17:18:39 +0800 Subject: [PATCH 019/509] md/raid10: fix io loss while replacement replace rdev [ Upstream commit 2ae6aaf76912bae53c74b191569d2ab484f24bf3 ] When removing a disk with replacement, the replacement will be used to replace rdev. During this process, there is a brief window in which both rdev and replacement are read as NULL in raid10_write_request(). This will result in io not being submitted but it should be. //remove //write raid10_remove_disk raid10_write_request mirror->rdev = NULL read rdev -> NULL mirror->rdev = mirror->replacement mirror->replacement = NULL read replacement -> NULL Fix it by reading replacement first and rdev later, meanwhile, use smp_mb() to prevent memory reordering. Fixes: 475b0321a4df ("md/raid10: writes should get directed to replacement as well as original.") Signed-off-by: Li Nan Reviewed-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230602091839.743798-3-linan666@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/raid10.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 01680029f0de..32a917e5103a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -751,8 +751,16 @@ static struct md_rdev *read_balance(struct r10conf *conf, disk = r10_bio->devs[slot].devnum; rdev = rcu_dereference(conf->mirrors[disk].replacement); if (rdev == NULL || test_bit(Faulty, &rdev->flags) || - r10_bio->devs[slot].addr + sectors > rdev->recovery_offset) + r10_bio->devs[slot].addr + sectors > + rdev->recovery_offset) { + /* + * Read replacement first to prevent reading both rdev + * and replacement as NULL during replacement replace + * rdev. + */ + smp_mb(); rdev = rcu_dereference(conf->mirrors[disk].rdev); + } if (rdev == NULL || test_bit(Faulty, &rdev->flags)) continue; @@ -1346,9 +1354,15 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; - struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev); - struct md_rdev *rrdev = rcu_dereference( - conf->mirrors[d].replacement); + struct md_rdev *rdev, *rrdev; + + rrdev = rcu_dereference(conf->mirrors[d].replacement); + /* + * Read replacement first to prevent reading both rdev and + * replacement as NULL during replacement replace rdev. + */ + smp_mb(); + rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev == rrdev) rrdev = NULL; if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { From 495cee0e14177c718c22782b7b61e7dc23698c27 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Apr 2021 10:35:51 +0100 Subject: [PATCH 020/509] irqchip/jcore-aic: Kill use of irq_create_strict_mappings() [ Upstream commit 5f8b938bd790cff6542c7fe3c1495c71f89fef1b ] irq_create_strict_mappings() is a poor way to allow the use of a linear IRQ domain as a legacy one. Let's be upfront about it. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210406093557.1073423-4-maz@kernel.org Stable-dep-of: 4848229494a3 ("irqchip/jcore-aic: Fix missing allocation of IRQ descriptors") Signed-off-by: Sasha Levin --- drivers/irqchip/irq-jcore-aic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c index 033bccb41455..5f47d8ee4ae3 100644 --- a/drivers/irqchip/irq-jcore-aic.c +++ b/drivers/irqchip/irq-jcore-aic.c @@ -100,11 +100,11 @@ static int __init aic_irq_of_init(struct device_node *node, jcore_aic.irq_unmask = noop; jcore_aic.name = "AIC"; - domain = irq_domain_add_linear(node, dom_sz, &jcore_aic_irqdomain_ops, + domain = irq_domain_add_legacy(node, dom_sz - min_irq, min_irq, min_irq, + &jcore_aic_irqdomain_ops, &jcore_aic); if (!domain) return -ENOMEM; - irq_create_strict_mappings(domain, min_irq, min_irq, dom_sz - min_irq); return 0; } From b315d57da4561acb08fa4cc9abbd690afd6bab51 Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Wed, 10 May 2023 18:33:42 +0200 Subject: [PATCH 021/509] irqchip/jcore-aic: Fix missing allocation of IRQ descriptors [ Upstream commit 4848229494a323eeaab62eee5574ef9f7de80374 ] The initialization function for the J-Core AIC aic_irq_of_init() is currently missing the call to irq_alloc_descs() which allocates and initializes all the IRQ descriptors. Add missing function call and return the error code from irq_alloc_descs() in case the allocation fails. Fixes: 981b58f66cfc ("irqchip/jcore-aic: Add J-Core AIC driver") Signed-off-by: John Paul Adrian Glaubitz Tested-by: Rob Landley Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230510163343.43090-1-glaubitz@physik.fu-berlin.de Signed-off-by: Sasha Levin --- drivers/irqchip/irq-jcore-aic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c index 5f47d8ee4ae3..b9dcc8e78c75 100644 --- a/drivers/irqchip/irq-jcore-aic.c +++ b/drivers/irqchip/irq-jcore-aic.c @@ -68,6 +68,7 @@ static int __init aic_irq_of_init(struct device_node *node, unsigned min_irq = JCORE_AIC2_MIN_HWIRQ; unsigned dom_sz = JCORE_AIC_MAX_HWIRQ+1; struct irq_domain *domain; + int ret; pr_info("Initializing J-Core AIC\n"); @@ -100,6 +101,12 @@ static int __init aic_irq_of_init(struct device_node *node, jcore_aic.irq_unmask = noop; jcore_aic.name = "AIC"; + ret = irq_alloc_descs(-1, min_irq, dom_sz - min_irq, + of_node_to_nid(node)); + + if (ret < 0) + return ret; + domain = irq_domain_add_legacy(node, dom_sz - min_irq, min_irq, min_irq, &jcore_aic_irqdomain_ops, &jcore_aic); From f1be1ed32daa053484222f7f9beb2b16c624dffd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Jun 2023 22:16:34 +0200 Subject: [PATCH 022/509] posix-timers: Prevent RT livelock in itimer_delete() [ Upstream commit 9d9e522010eb5685d8b53e8a24320653d9d4cbbf ] itimer_delete() has a retry loop when the timer is concurrently expired. On non-RT kernels this just spin-waits until the timer callback has completed, except for posix CPU timers which have HAVE_POSIX_CPU_TIMERS_TASK_WORK enabled. In that case and on RT kernels the existing task could live lock when preempting the task which does the timer delivery. Replace spin_unlock() with an invocation of timer_wait_running() to handle it the same way as the other retry loops in the posix timer code. Fixes: ec8f954a40da ("posix-timers: Use a callback for cancel synchronization on PREEMPT_RT") Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/87v8g7c50d.ffs@tglx Signed-off-by: Sasha Levin --- kernel/time/posix-timers.c | 43 +++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index d089627f2f2b..6d12a724d2b6 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -1037,27 +1037,52 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) } /* - * return timer owned by the process, used by exit_itimers + * Delete a timer if it is armed, remove it from the hash and schedule it + * for RCU freeing. */ static void itimer_delete(struct k_itimer *timer) { -retry_delete: - spin_lock_irq(&timer->it_lock); + unsigned long flags; + /* + * irqsave is required to make timer_wait_running() work. + */ + spin_lock_irqsave(&timer->it_lock, flags); + +retry_delete: + /* + * Even if the timer is not longer accessible from other tasks + * it still might be armed and queued in the underlying timer + * mechanism. Worse, that timer mechanism might run the expiry + * function concurrently. + */ if (timer_delete_hook(timer) == TIMER_RETRY) { - spin_unlock_irq(&timer->it_lock); + /* + * Timer is expired concurrently, prevent livelocks + * and pointless spinning on RT. + * + * timer_wait_running() drops timer::it_lock, which opens + * the possibility for another task to delete the timer. + * + * That's not possible here because this is invoked from + * do_exit() only for the last thread of the thread group. + * So no other task can access and delete that timer. + */ + if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer)) + return; + goto retry_delete; } list_del(&timer->list); - spin_unlock_irq(&timer->it_lock); + spin_unlock_irqrestore(&timer->it_lock, flags); release_posix_timer(timer, IT_ID_SET); } /* - * This is called by do_exit or de_thread, only when nobody else can - * modify the signal->posix_timers list. Yet we need sighand->siglock - * to prevent the race with /proc/pid/timers. + * Invoked from do_exit() when the last thread of a thread group exits. + * At that point no other task can access the timers of the dying + * task anymore. */ void exit_itimers(struct task_struct *tsk) { @@ -1067,10 +1092,12 @@ void exit_itimers(struct task_struct *tsk) if (list_empty(&tsk->signal->posix_timers)) return; + /* Protect against concurrent read via /proc/$PID/timers */ spin_lock_irq(&tsk->sighand->siglock); list_replace_init(&tsk->signal->posix_timers, &timers); spin_unlock_irq(&tsk->sighand->siglock); + /* The timers are not longer accessible via tsk::signal */ while (!list_empty(&timers)) { tmr = list_first_entry(&timers, struct k_itimer, list); itimer_delete(tmr); From a2f83a4c7cb503aaacedc4199b1b1bc916bb996d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 18 Apr 2023 16:38:54 +0200 Subject: [PATCH 023/509] tracing/timer: Add missing hrtimer modes to decode_hrtimer_mode(). [ Upstream commit 2951580ba6adb082bb6b7154a5ecb24e7c1f7569 ] The trace output for the HRTIMER_MODE_.*_HARD modes is seen as a number since these modes are not decoded. The author was not aware of the fancy decoding function which makes the life easier. Extend decode_hrtimer_mode() with the additional HRTIMER_MODE_.*_HARD modes. Fixes: ae6683d815895 ("hrtimer: Introduce HARD expiry mode") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Reviewed-by: Mukesh Ojha Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20230418143854.8vHWQKLM@linutronix.de Signed-off-by: Sasha Levin --- include/trace/events/timer.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 40e9b5a12732..5c540ccabcac 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -156,7 +156,11 @@ DEFINE_EVENT(timer_class, timer_cancel, { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ - { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) + { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }, \ + { HRTIMER_MODE_ABS_HARD, "ABS|HARD" }, \ + { HRTIMER_MODE_REL_HARD, "REL|HARD" }, \ + { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" }, \ + { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" }) /** * hrtimer_init - called when the hrtimer is initialized From ebdff0986513a29be242aace0ef89b6c105b0bf0 Mon Sep 17 00:00:00 2001 From: Feng Mingxi Date: Tue, 25 Apr 2023 06:56:11 +0000 Subject: [PATCH 024/509] clocksource/drivers/cadence-ttc: Fix memory leak in ttc_timer_probe [ Upstream commit 8b5bf64c89c7100c921bd807ba39b2eb003061ab ] Smatch reports: drivers/clocksource/timer-cadence-ttc.c:529 ttc_timer_probe() warn: 'timer_baseaddr' from of_iomap() not released on lines: 498,508,516. timer_baseaddr may have the problem of not being released after use, I replaced it with the devm_of_iomap() function and added the clk_put() function to cleanup the "clk_ce" and "clk_cs". Fixes: e932900a3279 ("arm: zynq: Use standard timer binding") Fixes: 70504f311d4b ("clocksource/drivers/cadence_ttc: Convert init function to return error") Signed-off-by: Feng Mingxi Reviewed-by: Dongliang Mu Acked-by: Michal Simek Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20230425065611.702917-1-m202271825@hust.edu.cn Signed-off-by: Sasha Levin --- drivers/clocksource/timer-cadence-ttc.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/timer-cadence-ttc.c b/drivers/clocksource/timer-cadence-ttc.c index 4efd0cf3b602..0d52e28fea4d 100644 --- a/drivers/clocksource/timer-cadence-ttc.c +++ b/drivers/clocksource/timer-cadence-ttc.c @@ -486,10 +486,10 @@ static int __init ttc_timer_probe(struct platform_device *pdev) * and use it. Note that the event timer uses the interrupt and it's the * 2nd TTC hence the irq_of_parse_and_map(,1) */ - timer_baseaddr = of_iomap(timer, 0); - if (!timer_baseaddr) { + timer_baseaddr = devm_of_iomap(&pdev->dev, timer, 0, NULL); + if (IS_ERR(timer_baseaddr)) { pr_err("ERROR: invalid timer base address\n"); - return -ENXIO; + return PTR_ERR(timer_baseaddr); } irq = irq_of_parse_and_map(timer, 1); @@ -513,20 +513,27 @@ static int __init ttc_timer_probe(struct platform_device *pdev) clk_ce = of_clk_get(timer, clksel); if (IS_ERR(clk_ce)) { pr_err("ERROR: timer input clock not found\n"); - return PTR_ERR(clk_ce); + ret = PTR_ERR(clk_ce); + goto put_clk_cs; } ret = ttc_setup_clocksource(clk_cs, timer_baseaddr, timer_width); if (ret) - return ret; + goto put_clk_ce; ret = ttc_setup_clockevent(clk_ce, timer_baseaddr + 4, irq); if (ret) - return ret; + goto put_clk_ce; pr_info("%pOFn #0 at %p, irq=%d\n", timer, timer_baseaddr, irq); return 0; + +put_clk_ce: + clk_put(clk_ce); +put_clk_cs: + clk_put(clk_cs); + return ret; } static const struct of_device_id ttc_timer_of_match[] = { From b69868d50df43f844b1b309d04ae7fcd16207b9d Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Tue, 18 Apr 2023 06:07:43 -0700 Subject: [PATCH 025/509] PM: domains: fix integer overflow issues in genpd_parse_state() [ Upstream commit e5d1c8722083f0332dcd3c85fa1273d85fb6bed8 ] Currently, while calculating residency and latency values, right operands may overflow if resulting values are big enough. To prevent this, albeit unlikely case, play it safe and convert right operands to left ones' type s64. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 30f604283e05 ("PM / Domains: Allow domain power states to be read from DT") Signed-off-by: Nikita Zhandarovich Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/base/power/domain.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index d0ba5459ce0b..8a90f08c9682 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2763,10 +2763,10 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state, err = of_property_read_u32(state_node, "min-residency-us", &residency); if (!err) - genpd_state->residency_ns = 1000 * residency; + genpd_state->residency_ns = 1000LL * residency; - genpd_state->power_on_latency_ns = 1000 * exit_latency; - genpd_state->power_off_latency_ns = 1000 * entry_latency; + genpd_state->power_on_latency_ns = 1000LL * exit_latency; + genpd_state->power_off_latency_ns = 1000LL * entry_latency; genpd_state->fwnode = &state_node->fwnode; return 0; From 23f6efd226448b5bef1db3eeec88a6d6085b3519 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 24 May 2023 17:44:32 +0100 Subject: [PATCH 026/509] perf/arm-cmn: Fix DTC reset [ Upstream commit 71746c995cac92fcf6a65661b51211cf2009d7f0 ] It turns out that my naive DTC reset logic fails to work as intended, since, after checking with the hardware designers, the PMU actually needs to be fully enabled in order to correctly clear any pending overflows. Therefore, invert the sequence to start with turning on both enables so that we can reliably get the DTCs into a known state, then moving to our normal counters-stopped state from there. Since all the DTM counters have already been unpaired during the initial discovery pass, we just need to additionally reset the cycle counters to ensure that no other unexpected overflows occur during this period. Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver") Reported-by: Geoff Blake Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0ea4559261ea394f827c9aee5168c77a60aaee03.1684946389.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/perf/arm-cmn.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index bb019e383988..36061aaf026c 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1254,9 +1254,10 @@ static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int id if (dtc->irq < 0) return dtc->irq; - writel_relaxed(0, dtc->base + CMN_DT_PMCR); + writel_relaxed(CMN_DT_DTC_CTL_DT_EN, dtc->base + CMN_DT_DTC_CTL); + writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); + writeq_relaxed(0, dtc->base + CMN_DT_PMCCNTR); writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR); - writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR); /* We do at least know that a DTC's XP must be in that DTC's domain */ xp = arm_cmn_node_to_xp(dn); @@ -1303,7 +1304,7 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn) dn->type = CMN_TYPE_RNI; } - writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL); + arm_cmn_set_state(cmn, CMN_STATE_DISABLED); return 0; } From a50b75c13d37cfa34af6998fa67c5b35fd2c739f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 6 Jun 2023 22:00:00 +0800 Subject: [PATCH 027/509] powercap: RAPL: Fix CONFIG_IOSF_MBI dependency [ Upstream commit 4658fe81b3f8afe8adf37734ec5fe595d90415c6 ] After commit 3382388d7148 ("intel_rapl: abstract RAPL common code"), accessing to IOSF_MBI interface is done in the RAPL common code. Thus it is the CONFIG_INTEL_RAPL_CORE that has dependency of CONFIG_IOSF_MBI, while CONFIG_INTEL_RAPL_MSR does not. This problem was not exposed previously because all the previous RAPL common code users, aka, the RAPL MSR and MMIO I/F drivers, have CONFIG_IOSF_MBI selected. Fix the CONFIG_IOSF_MBI dependency in RAPL code. This also fixes a build time failure when the RAPL TPMI I/F driver is introduced without selecting CONFIG_IOSF_MBI. x86_64-linux-ld: vmlinux.o: in function `set_floor_freq_atom': intel_rapl_common.c:(.text+0x2dac9b8): undefined reference to `iosf_mbi_write' x86_64-linux-ld: intel_rapl_common.c:(.text+0x2daca66): undefined reference to `iosf_mbi_read' Reference to iosf_mbi.h is also removed from the RAPL MSR I/F driver. Fixes: 3382388d7148 ("intel_rapl: abstract RAPL common code") Reported-by: Arnd Bergmann Link: https://lore.kernel.org/all/20230601213246.3271412-1-arnd@kernel.org Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/powercap/Kconfig | 4 +++- drivers/powercap/intel_rapl_msr.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index bc228725346b..0e4b2c214a70 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -18,10 +18,12 @@ if POWERCAP # Client driver configurations go here. config INTEL_RAPL_CORE tristate + depends on PCI + select IOSF_MBI config INTEL_RAPL tristate "Intel RAPL Support via MSR Interface" - depends on X86 && IOSF_MBI + depends on X86 && PCI select INTEL_RAPL_CORE help This enables support for the Intel Running Average Power Limit (RAPL) diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 1646808d354c..6b68e5ed2081 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -22,7 +22,6 @@ #include #include -#include #include #include From a8bfe527556b6f7fc29cbfd2d176dab960075b2f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Jun 2023 19:28:42 +0100 Subject: [PATCH 028/509] ARM: 9303/1: kprobes: avoid missing-declaration warnings [ Upstream commit 1b9c3ddcec6a55e15d3e38e7405e2d078db02020 ] checker_stack_use_t32strd() and kprobe_handler() can be made static since they are not used from other files, while coverage_start_registers() and __kprobes_test_case() are used from assembler code, and just need a declaration to avoid a warning with the global definition. arch/arm/probes/kprobes/checkers-common.c:43:18: error: no previous prototype for 'checker_stack_use_t32strd' arch/arm/probes/kprobes/core.c:236:16: error: no previous prototype for 'kprobe_handler' arch/arm/probes/kprobes/test-core.c:723:10: error: no previous prototype for 'coverage_start_registers' arch/arm/probes/kprobes/test-core.c:918:14: error: no previous prototype for '__kprobes_test_case_start' arch/arm/probes/kprobes/test-core.c:952:14: error: no previous prototype for '__kprobes_test_case_end_16' arch/arm/probes/kprobes/test-core.c:967:14: error: no previous prototype for '__kprobes_test_case_end_32' Fixes: 6624cf651f1a ("ARM: kprobes: collects stack consumption for store instructions") Fixes: 454f3e132d05 ("ARM/kprobes: Remove jprobe arm implementation") Acked-by: Masami Hiramatsu (Google) Reviewed-by: Kees Cook Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) Signed-off-by: Sasha Levin --- arch/arm/probes/kprobes/checkers-common.c | 2 +- arch/arm/probes/kprobes/core.c | 2 +- arch/arm/probes/kprobes/opt-arm.c | 2 -- arch/arm/probes/kprobes/test-core.c | 2 +- arch/arm/probes/kprobes/test-core.h | 4 ++++ 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm/probes/kprobes/checkers-common.c b/arch/arm/probes/kprobes/checkers-common.c index 4d720990cf2a..eba7ac4725c0 100644 --- a/arch/arm/probes/kprobes/checkers-common.c +++ b/arch/arm/probes/kprobes/checkers-common.c @@ -40,7 +40,7 @@ enum probes_insn checker_stack_use_imm_0xx(probes_opcode_t insn, * Different from other insn uses imm8, the real addressing offset of * STRD in T32 encoding should be imm8 * 4. See ARMARM description. */ -enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn, +static enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn, struct arch_probes_insn *asi, const struct decode_header *h) { diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index e513d8a46776..c0ed17289378 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -231,7 +231,7 @@ singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) * kprobe, and that level is reserved for user kprobe handlers, so we can't * risk encountering a new kprobe in an interrupt handler. */ -void __kprobes kprobe_handler(struct pt_regs *regs) +static void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur; struct kprobe_ctlblk *kcb; diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index c78180172120..e20304f1d8bc 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -145,8 +145,6 @@ __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) } } -extern void kprobe_handler(struct pt_regs *regs); - static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index c562832b8627..171c7076b89f 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -720,7 +720,7 @@ static const char coverage_register_lookup[16] = { [REG_TYPE_NOSPPCX] = COVERAGE_ANY_REG | COVERAGE_SP, }; -unsigned coverage_start_registers(const struct decode_header *h) +static unsigned coverage_start_registers(const struct decode_header *h) { unsigned regs = 0; int i; diff --git a/arch/arm/probes/kprobes/test-core.h b/arch/arm/probes/kprobes/test-core.h index 19a5b2add41e..805116c2ec27 100644 --- a/arch/arm/probes/kprobes/test-core.h +++ b/arch/arm/probes/kprobes/test-core.h @@ -453,3 +453,7 @@ void kprobe_thumb32_test_cases(void); #else void kprobe_arm_test_cases(void); #endif + +void __kprobes_test_case_start(void); +void __kprobes_test_case_end_16(void); +void __kprobes_test_case_end_32(void); From bacc49b2d5619bdc240ceed625416e0e15b2cbed Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 21 Jun 2023 09:58:39 +0300 Subject: [PATCH 029/509] cpufreq: intel_pstate: Fix energy_performance_preference for passive [ Upstream commit 03f44ffb3d5be2fceda375d92c70ab6de4df7081 ] If the intel_pstate driver is set to passive mode, then writing the same value to the energy_performance_preference sysfs twice will fail. This is caused by the wrong return value used (index of the matched energy_perf_string), instead of the length of the passed in parameter. Fix by forcing the internal return value to zero when the same preference is passed in by user. This same issue is not present when active mode is used for the driver. Fixes: f6ebbcf08f37 ("cpufreq: intel_pstate: Implement passive mode with HWP enabled") Reported-by: Niklas Neronin Signed-off-by: Tero Kristo Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/cpufreq/intel_pstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1686705bee7b..4b06b81d8bb0 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -777,6 +777,8 @@ static ssize_t store_energy_performance_preference( err = cpufreq_start_governor(policy); if (!ret) ret = err; + } else { + ret = 0; } } From 456f783b83f87a645de7118ae0c1867ac67f584e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 May 2023 20:46:05 +0200 Subject: [PATCH 030/509] thermal/drivers/sun8i: Fix some error handling paths in sun8i_ths_probe() [ Upstream commit 89382022b370dfd34eaae9c863baa123fcd4d132 ] Should an error occur after calling sun8i_ths_resource_init() in the probe function, some resources need to be released, as already done in the .remove() function. Switch to the devm_clk_get_enabled() helper and add a new devm_action to turn sun8i_ths_resource_init() into a fully managed function. Move the place where reset_control_deassert() is called so that the recommended order of reset release/clock enable steps is kept. A64 manual states that: 3.3.6.4. Gating and reset Make sure that the reset signal has been released before the release of module clock gating; This fixes the issue and removes some LoC at the same time. Fixes: dccc5c3b6f30 ("thermal/drivers/sun8i: Add thermal driver for H6/H5/H3/A64/A83T/R40") Signed-off-by: Christophe JAILLET Acked-by: Maxime Ripard Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/a8ae84bd2dc4b55fe428f8e20f31438bf8bb6762.1684089931.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/thermal/sun8i_thermal.c | 55 +++++++++++---------------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c index f8b13071a6f4..e053b0628017 100644 --- a/drivers/thermal/sun8i_thermal.c +++ b/drivers/thermal/sun8i_thermal.c @@ -318,6 +318,11 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev) return ret; } +static void sun8i_ths_reset_control_assert(void *data) +{ + reset_control_assert(data); +} + static int sun8i_ths_resource_init(struct ths_device *tmdev) { struct device *dev = tmdev->dev; @@ -338,47 +343,35 @@ static int sun8i_ths_resource_init(struct ths_device *tmdev) if (IS_ERR(tmdev->reset)) return PTR_ERR(tmdev->reset); - tmdev->bus_clk = devm_clk_get(&pdev->dev, "bus"); + ret = reset_control_deassert(tmdev->reset); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, sun8i_ths_reset_control_assert, + tmdev->reset); + if (ret) + return ret; + + tmdev->bus_clk = devm_clk_get_enabled(&pdev->dev, "bus"); if (IS_ERR(tmdev->bus_clk)) return PTR_ERR(tmdev->bus_clk); } if (tmdev->chip->has_mod_clk) { - tmdev->mod_clk = devm_clk_get(&pdev->dev, "mod"); + tmdev->mod_clk = devm_clk_get_enabled(&pdev->dev, "mod"); if (IS_ERR(tmdev->mod_clk)) return PTR_ERR(tmdev->mod_clk); } - ret = reset_control_deassert(tmdev->reset); + ret = clk_set_rate(tmdev->mod_clk, 24000000); if (ret) return ret; - ret = clk_prepare_enable(tmdev->bus_clk); - if (ret) - goto assert_reset; - - ret = clk_set_rate(tmdev->mod_clk, 24000000); - if (ret) - goto bus_disable; - - ret = clk_prepare_enable(tmdev->mod_clk); - if (ret) - goto bus_disable; - ret = sun8i_ths_calibrate(tmdev); if (ret) - goto mod_disable; + return ret; return 0; - -mod_disable: - clk_disable_unprepare(tmdev->mod_clk); -bus_disable: - clk_disable_unprepare(tmdev->bus_clk); -assert_reset: - reset_control_assert(tmdev->reset); - - return ret; } static int sun8i_h3_thermal_init(struct ths_device *tmdev) @@ -529,17 +522,6 @@ static int sun8i_ths_probe(struct platform_device *pdev) return 0; } -static int sun8i_ths_remove(struct platform_device *pdev) -{ - struct ths_device *tmdev = platform_get_drvdata(pdev); - - clk_disable_unprepare(tmdev->mod_clk); - clk_disable_unprepare(tmdev->bus_clk); - reset_control_assert(tmdev->reset); - - return 0; -} - static const struct ths_thermal_chip sun8i_a83t_ths = { .sensor_num = 3, .scale = 705, @@ -641,7 +623,6 @@ MODULE_DEVICE_TABLE(of, of_ths_match); static struct platform_driver ths_driver = { .probe = sun8i_ths_probe, - .remove = sun8i_ths_remove, .driver = { .name = "sun8i-thermal", .of_match_table = of_ths_match, From 8cd9917c13a70a82368b086a51c0948694be230b Mon Sep 17 00:00:00 2001 From: "Jiangong.Han" Date: Tue, 22 Jun 2021 18:37:08 +0800 Subject: [PATCH 031/509] rcuscale: Console output claims too few grace periods [ Upstream commit 811192c5f24bfd7246ce9ce06f668d8c408bf39b ] The rcuscale console output claims N grace periods, numbered from zero to N, which means that there were really N+1 grace periods. The root cause of this bug is that rcu_scale_writer() stores the number of the last grace period (numbered from zero) into writer_n_durations[me] instead of the number of grace periods. This commit therefore assigns the actual number of grace periods to writer_n_durations[me], and also makes the corresponding adjustment to the loop outputting per-grace-period measurements. Sample of old console output: rcu-scale: writer 0 gps: 133 ...... rcu-scale: 0 writer-duration: 0 44003961 rcu-scale: 0 writer-duration: 1 32003582 ...... rcu-scale: 0 writer-duration: 132 28004391 rcu-scale: 0 writer-duration: 133 27996410 Sample of new console output: rcu-scale: writer 0 gps: 134 ...... rcu-scale: 0 writer-duration: 0 44003961 rcu-scale: 0 writer-duration: 1 32003582 ...... rcu-scale: 0 writer-duration: 132 28004391 rcu-scale: 0 writer-duration: 133 27996410 Signed-off-by: Jiangong.Han Signed-off-by: Paul E. McKenney Stable-dep-of: 23fc8df26dea ("rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading rcuscale") Signed-off-by: Sasha Levin --- kernel/rcu/rcuscale.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 2819b95479af..28bc688e2705 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -457,7 +457,7 @@ rcu_scale_writer(void *arg) if (gp_async) { cur_ops->gp_barrier(); } - writer_n_durations[me] = i_max; + writer_n_durations[me] = i_max + 1; torture_kthread_stopping("rcu_scale_writer"); return 0; } @@ -531,7 +531,7 @@ rcu_scale_cleanup(void) wdpp = writer_durations[i]; if (!wdpp) continue; - for (j = 0; j <= writer_n_durations[i]; j++) { + for (j = 0; j < writer_n_durations[i]; j++) { wdp = &wdpp[j]; pr_alert("%s%s %4d writer-duration: %5d %llu\n", scale_type, SCALE_FLAG, From b62c816bdb5e00347f4618fc122a8d2bd79e676a Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Fri, 29 Oct 2021 17:40:28 +0800 Subject: [PATCH 032/509] rcuscale: Always log error message [ Upstream commit 86e7ed1bd57d020e35d430542bf5d689c3200568 ] Unconditionally log messages corresponding to errors. Acked-by: Davidlohr Bueso Signed-off-by: Li Zhijian Signed-off-by: Paul E. McKenney Stable-dep-of: 23fc8df26dea ("rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading rcuscale") Signed-off-by: Sasha Levin --- kernel/rcu/rcuscale.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 28bc688e2705..4452c3c4060c 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -49,8 +49,8 @@ MODULE_AUTHOR("Paul E. McKenney "); pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s) #define VERBOSE_SCALEOUT_STRING(s) \ do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0) -#define VERBOSE_SCALEOUT_ERRSTRING(s) \ - do { if (verbose) pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s); } while (0) +#define SCALEOUT_ERRSTRING(s) \ + pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s) /* * The intended use cases for the nreaders and nwriters module parameters @@ -484,11 +484,11 @@ rcu_scale_cleanup(void) * during the mid-boot phase, so have to wait till the end. */ if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) - VERBOSE_SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); + SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); if (rcu_gp_is_normal() && gp_exp) - VERBOSE_SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); + SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); if (gp_exp && gp_async) - VERBOSE_SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); + SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); if (torture_cleanup_begin()) return; @@ -803,7 +803,7 @@ rcu_scale_init(void) reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]), GFP_KERNEL); if (reader_tasks == NULL) { - VERBOSE_SCALEOUT_ERRSTRING("out of memory"); + SCALEOUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } @@ -823,7 +823,7 @@ rcu_scale_init(void) kcalloc(nrealwriters, sizeof(*writer_n_durations), GFP_KERNEL); if (!writer_tasks || !writer_durations || !writer_n_durations) { - VERBOSE_SCALEOUT_ERRSTRING("out of memory"); + SCALEOUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } From ecc5e6dbc26941804433a949c3271757b4e8d3c9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 31 Jan 2023 12:08:54 -0800 Subject: [PATCH 033/509] rcuscale: Move shutdown from wait_event() to wait_event_idle() [ Upstream commit ef1ef3d47677dc191b88650a9f7f91413452cc1b ] The rcu_scale_shutdown() and kfree_scale_shutdown() kthreads/functions use wait_event() to wait for the rcuscale test to complete. However, each updater thread in such a test waits for at least 100 grace periods. If each grace period takes more than 1.2 seconds, which is long, but not insanely so, this can trigger the hung-task timeout. This commit therefore replaces those wait_event() calls with calls to wait_event_idle(), which do not trigger the hung-task timeout. Reported-by: kernel test robot Reported-by: Liam Howlett Signed-off-by: Paul E. McKenney Tested-by: Yujie Liu Signed-off-by: Boqun Feng Stable-dep-of: 23fc8df26dea ("rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading rcuscale") Signed-off-by: Sasha Levin --- kernel/rcu/rcuscale.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 4452c3c4060c..74be0b6438fb 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -579,8 +579,7 @@ static int compute_real(int n) static int rcu_scale_shutdown(void *arg) { - wait_event(shutdown_wq, - atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); + wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); smp_mb(); /* Wake before output. */ rcu_scale_cleanup(); kernel_power_off(); @@ -693,8 +692,8 @@ kfree_scale_cleanup(void) static int kfree_scale_shutdown(void *arg) { - wait_event(shutdown_wq, - atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads); + wait_event_idle(shutdown_wq, + atomic_read(&n_kfree_scale_thread_ended) >= kfree_nrealthreads); smp_mb(); /* Wake before output. */ From d414e24d150943b1f115f740a3df5b8549e48966 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 22 Mar 2023 19:42:40 +0800 Subject: [PATCH 034/509] rcu/rcuscale: Move rcu_scale_*() after kfree_scale_cleanup() [ Upstream commit bf5ddd736509a7d9077c0b6793e6f0852214dbea ] This code-movement-only commit moves the rcu_scale_cleanup() and rcu_scale_shutdown() functions to follow kfree_scale_cleanup(). This is code movement is in preparation for a bug-fix patch that invokes kfree_scale_cleanup() from rcu_scale_cleanup(). Signed-off-by: Qiuxu Zhuo Signed-off-by: Paul E. McKenney Reviewed-by: Joel Fernandes (Google) Stable-dep-of: 23fc8df26dea ("rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading rcuscale") Signed-off-by: Sasha Levin --- kernel/rcu/rcuscale.c | 194 +++++++++++++++++++++--------------------- 1 file changed, 97 insertions(+), 97 deletions(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 74be0b6438fb..dfc6172ffe1d 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -470,89 +470,6 @@ rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag) scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown); } -static void -rcu_scale_cleanup(void) -{ - int i; - int j; - int ngps = 0; - u64 *wdp; - u64 *wdpp; - - /* - * Would like warning at start, but everything is expedited - * during the mid-boot phase, so have to wait till the end. - */ - if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) - SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); - if (rcu_gp_is_normal() && gp_exp) - SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); - if (gp_exp && gp_async) - SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); - - if (torture_cleanup_begin()) - return; - if (!cur_ops) { - torture_cleanup_end(); - return; - } - - if (reader_tasks) { - for (i = 0; i < nrealreaders; i++) - torture_stop_kthread(rcu_scale_reader, - reader_tasks[i]); - kfree(reader_tasks); - } - - if (writer_tasks) { - for (i = 0; i < nrealwriters; i++) { - torture_stop_kthread(rcu_scale_writer, - writer_tasks[i]); - if (!writer_n_durations) - continue; - j = writer_n_durations[i]; - pr_alert("%s%s writer %d gps: %d\n", - scale_type, SCALE_FLAG, i, j); - ngps += j; - } - pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n", - scale_type, SCALE_FLAG, - t_rcu_scale_writer_started, t_rcu_scale_writer_finished, - t_rcu_scale_writer_finished - - t_rcu_scale_writer_started, - ngps, - rcuscale_seq_diff(b_rcu_gp_test_finished, - b_rcu_gp_test_started)); - for (i = 0; i < nrealwriters; i++) { - if (!writer_durations) - break; - if (!writer_n_durations) - continue; - wdpp = writer_durations[i]; - if (!wdpp) - continue; - for (j = 0; j < writer_n_durations[i]; j++) { - wdp = &wdpp[j]; - pr_alert("%s%s %4d writer-duration: %5d %llu\n", - scale_type, SCALE_FLAG, - i, j, *wdp); - if (j % 100 == 0) - schedule_timeout_uninterruptible(1); - } - kfree(writer_durations[i]); - } - kfree(writer_tasks); - kfree(writer_durations); - kfree(writer_n_durations); - } - - /* Do torture-type-specific cleanup operations. */ - if (cur_ops->cleanup != NULL) - cur_ops->cleanup(); - - torture_cleanup_end(); -} - /* * Return the number if non-negative. If -1, the number of CPUs. * If less than -1, that much less than the number of CPUs, but @@ -572,20 +489,6 @@ static int compute_real(int n) return nr; } -/* - * RCU scalability shutdown kthread. Just waits to be awakened, then shuts - * down system. - */ -static int -rcu_scale_shutdown(void *arg) -{ - wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); - smp_mb(); /* Wake before output. */ - rcu_scale_cleanup(); - kernel_power_off(); - return -EINVAL; -} - /* * kfree_rcu() scalability tests: Start a kfree_rcu() loop on all CPUs for number * of iterations and measure total time and number of GP for all iterations to complete. @@ -747,6 +650,103 @@ kfree_scale_init(void) return firsterr; } +static void +rcu_scale_cleanup(void) +{ + int i; + int j; + int ngps = 0; + u64 *wdp; + u64 *wdpp; + + /* + * Would like warning at start, but everything is expedited + * during the mid-boot phase, so have to wait till the end. + */ + if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) + SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); + if (rcu_gp_is_normal() && gp_exp) + SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); + if (gp_exp && gp_async) + SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); + + if (torture_cleanup_begin()) + return; + if (!cur_ops) { + torture_cleanup_end(); + return; + } + + if (reader_tasks) { + for (i = 0; i < nrealreaders; i++) + torture_stop_kthread(rcu_scale_reader, + reader_tasks[i]); + kfree(reader_tasks); + } + + if (writer_tasks) { + for (i = 0; i < nrealwriters; i++) { + torture_stop_kthread(rcu_scale_writer, + writer_tasks[i]); + if (!writer_n_durations) + continue; + j = writer_n_durations[i]; + pr_alert("%s%s writer %d gps: %d\n", + scale_type, SCALE_FLAG, i, j); + ngps += j; + } + pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n", + scale_type, SCALE_FLAG, + t_rcu_scale_writer_started, t_rcu_scale_writer_finished, + t_rcu_scale_writer_finished - + t_rcu_scale_writer_started, + ngps, + rcuscale_seq_diff(b_rcu_gp_test_finished, + b_rcu_gp_test_started)); + for (i = 0; i < nrealwriters; i++) { + if (!writer_durations) + break; + if (!writer_n_durations) + continue; + wdpp = writer_durations[i]; + if (!wdpp) + continue; + for (j = 0; j < writer_n_durations[i]; j++) { + wdp = &wdpp[j]; + pr_alert("%s%s %4d writer-duration: %5d %llu\n", + scale_type, SCALE_FLAG, + i, j, *wdp); + if (j % 100 == 0) + schedule_timeout_uninterruptible(1); + } + kfree(writer_durations[i]); + } + kfree(writer_tasks); + kfree(writer_durations); + kfree(writer_n_durations); + } + + /* Do torture-type-specific cleanup operations. */ + if (cur_ops->cleanup != NULL) + cur_ops->cleanup(); + + torture_cleanup_end(); +} + +/* + * RCU scalability shutdown kthread. Just waits to be awakened, then shuts + * down system. + */ +static int +rcu_scale_shutdown(void *arg) +{ + wait_event_idle(shutdown_wq, atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters); + smp_mb(); /* Wake before output. */ + rcu_scale_cleanup(); + kernel_power_off(); + return -EINVAL; +} + static int __init rcu_scale_init(void) { From 604d6a5ff718874904b0fe614878a42b42c0d699 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Wed, 22 Mar 2023 19:42:41 +0800 Subject: [PATCH 035/509] rcu/rcuscale: Stop kfree_scale_thread thread(s) after unloading rcuscale [ Upstream commit 23fc8df26dead16687ae6eb47b0561a4a832e2f6 ] Running the 'kfree_rcu_test' test case [1] results in a splat [2]. The root cause is the kfree_scale_thread thread(s) continue running after unloading the rcuscale module. This commit fixes that isue by invoking kfree_scale_cleanup() from rcu_scale_cleanup() when removing the rcuscale module. [1] modprobe rcuscale kfree_rcu_test=1 // After some time rmmod rcuscale rmmod torture [2] BUG: unable to handle page fault for address: ffffffffc0601a87 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page PGD 11de4f067 P4D 11de4f067 PUD 11de51067 PMD 112f4d067 PTE 0 Oops: 0010 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 1798 Comm: kfree_scale_thr Not tainted 6.3.0-rc1-rcu+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 RIP: 0010:0xffffffffc0601a87 Code: Unable to access opcode bytes at 0xffffffffc0601a5d. RSP: 0018:ffffb25bc2e57e18 EFLAGS: 00010297 RAX: 0000000000000000 RBX: ffffffffc061f0b6 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff962fd0de RDI: ffffffff962fd0de RBP: ffffb25bc2e57ea8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000000 R14: 000000000000000a R15: 00000000001c1dbe FS: 0000000000000000(0000) GS:ffff921fa2200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffc0601a5d CR3: 000000011de4c006 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? kvfree_call_rcu+0xf0/0x3a0 ? kthread+0xf3/0x120 ? kthread_complete_and_exit+0x20/0x20 ? ret_from_fork+0x1f/0x30 Modules linked in: rfkill sunrpc ... [last unloaded: torture] CR2: ffffffffc0601a87 ---[ end trace 0000000000000000 ]--- Fixes: e6e78b004fa7 ("rcuperf: Add kfree_rcu() performance Tests") Reviewed-by: Davidlohr Bueso Reviewed-by: Joel Fernandes (Google) Signed-off-by: Qiuxu Zhuo Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- kernel/rcu/rcuscale.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index dfc6172ffe1d..6c05365ed80f 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -670,6 +670,11 @@ rcu_scale_cleanup(void) if (gp_exp && gp_async) SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); + if (kfree_rcu_test) { + kfree_scale_cleanup(); + return; + } + if (torture_cleanup_begin()) return; if (!cur_ops) { From 365f546de5848795543acc8e177cf84b0f32b46d Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 4 May 2023 16:30:01 +0530 Subject: [PATCH 036/509] perf/ibs: Fix interface via core pmu events [ Upstream commit 2fad201fe38ff9a692acedb1990ece2c52a29f95 ] Although, IBS pmus can be invoked via their own interface, indirect IBS invocation via core pmu events is also supported with fixed set of events: cpu-cycles:p, r076:p (same as cpu-cycles:p) and r0C1:p (micro-ops) for user convenience. This indirect IBS invocation is broken since commit 66d258c5b048 ("perf/core: Optimize perf_init_event()"), which added RAW pmu under 'pmu_idr' list and thus if event_init() fails with RAW pmu, it started returning error instead of trying other pmus. Forward precise events from core pmu to IBS by overwriting 'type' and 'config' in the kernel copy of perf_event_attr. Overwriting will cause perf_init_event() to retry with updated 'type' and 'config', which will automatically forward event to IBS pmu. Without patch: $ sudo ./perf record -C 0 -e r076:p -- sleep 1 Error: The r076:p event is not supported. With patch: $ sudo ./perf record -C 0 -e r076:p -- sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.341 MB perf.data (37 samples) ] Fixes: 66d258c5b048 ("perf/core: Optimize perf_init_event()") Reported-by: Stephane Eranian Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230504110003.2548-3-ravi.bangoria@amd.com Signed-off-by: Sasha Levin --- arch/x86/events/amd/core.c | 2 +- arch/x86/events/amd/ibs.c | 53 +++++++++++++++---------------- arch/x86/include/asm/perf_event.h | 2 ++ 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 52eba415928a..afc955340f81 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -364,7 +364,7 @@ static int amd_pmu_hw_config(struct perf_event *event) /* pass precise event sampling to ibs: */ if (event->attr.precise_ip && get_ibs_caps()) - return -ENOENT; + return forward_event_to_ibs(event); if (has_branch_stack(event)) return -EOPNOTSUPP; diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 8a85658a24cc..354d52e17ef5 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -202,7 +202,7 @@ static struct perf_ibs *get_ibs_pmu(int type) } /* - * Use IBS for precise event sampling: + * core pmu config -> IBS config * * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count * perf record -a -e r076:p ... # same as -e cpu-cycles:p @@ -211,25 +211,9 @@ static struct perf_ibs *get_ibs_pmu(int type) * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, * MSRC001_1033) is used to select either cycle or micro-ops counting * mode. - * - * The rip of IBS samples has skid 0. Thus, IBS supports precise - * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the - * rip is invalid when IBS was not able to record the rip correctly. - * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. - * */ -static int perf_ibs_precise_event(struct perf_event *event, u64 *config) +static int core_pmu_ibs_config(struct perf_event *event, u64 *config) { - switch (event->attr.precise_ip) { - case 0: - return -ENOENT; - case 1: - case 2: - break; - default: - return -EOPNOTSUPP; - } - switch (event->attr.type) { case PERF_TYPE_HARDWARE: switch (event->attr.config) { @@ -255,22 +239,37 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config) return -EOPNOTSUPP; } +/* + * The rip of IBS samples has skid 0. Thus, IBS supports precise + * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the + * rip is invalid when IBS was not able to record the rip correctly. + * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. + */ +int forward_event_to_ibs(struct perf_event *event) +{ + u64 config = 0; + + if (!event->attr.precise_ip || event->attr.precise_ip > 2) + return -EOPNOTSUPP; + + if (!core_pmu_ibs_config(event, &config)) { + event->attr.type = perf_ibs_op.pmu.type; + event->attr.config = config; + } + return -ENOENT; +} + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs; u64 max_cnt, config; - int ret; perf_ibs = get_ibs_pmu(event->attr.type); - if (perf_ibs) { - config = event->attr.config; - } else { - perf_ibs = &perf_ibs_op; - ret = perf_ibs_precise_event(event, &config); - if (ret) - return ret; - } + if (!perf_ibs) + return -ENOENT; + + config = event->attr.config; if (event->pmu != &perf_ibs->pmu) return -ENOENT; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index b9a7fd0a27e2..a4e4bbb7795d 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -412,8 +412,10 @@ struct pebs_xmm { #ifdef CONFIG_X86_LOCAL_APIC extern u32 get_ibs_caps(void); +extern int forward_event_to_ibs(struct perf_event *event); #else static inline u32 get_ibs_caps(void) { return 0; } +static inline int forward_event_to_ibs(struct perf_event *event) { return -ENOENT; } #endif #ifdef CONFIG_PERF_EVENTS From cba85e1cb79f089f13e4b2cca6d66659b38567fa Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 6 Mar 2023 13:32:59 +0100 Subject: [PATCH 037/509] x86/mm: Fix __swp_entry_to_pte() for Xen PV guests [ Upstream commit 0f88130e8a6fd185b0aeb5d8e286083735f2585a ] Normally __swp_entry_to_pte() is never called with a value translating to a valid PTE. The only known exception is pte_swap_tests(), resulting in a WARN splat in Xen PV guests, as __pte_to_swp_entry() did translate the PFN of the valid PTE to a guest local PFN, while __swp_entry_to_pte() doesn't do the opposite translation. Fix that by using __pte() in __swp_entry_to_pte() instead of open coding the native variant of it. For correctness do the similar conversion for __swp_entry_to_pmd(). Fixes: 05289402d717 ("mm/debug_vm_pgtable: add tests validating arch helpers for core MM features") Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230306123259.12461-1-jgross@suse.com Signed-off-by: Sasha Levin --- arch/x86/include/asm/pgtable_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 56d0399a0cd1..dd520b44e89c 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -235,8 +235,8 @@ static inline void native_pgd_clear(pgd_t *pgd) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) -#define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) -#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val }) +#define __swp_entry_to_pte(x) (__pte((x).val)) +#define __swp_entry_to_pmd(x) (__pmd((x).val)) extern int kern_addr_valid(unsigned long addr); extern void cleanup_highmap(void); From 16ec59c03ad258b716374946a6b1530921d0faf3 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Mon, 6 Mar 2023 11:40:36 +0100 Subject: [PATCH 038/509] evm: Complete description of evm_inode_setattr() [ Upstream commit b1de86d4248b273cb12c4cd7d20c08d459519f7d ] Add the description for missing parameters of evm_inode_setattr() to avoid the warning arising with W=n compile option. Fixes: 817b54aa45db ("evm: add evm_inode_setattr to prevent updating an invalid security.evm") # v3.2+ Fixes: c1632a0f1120 ("fs: port ->setattr() to pass mnt_idmap") # v6.3+ Signed-off-by: Roberto Sassu Reviewed-by: Stefan Berger Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- security/integrity/evm/evm_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 0033364ac404..8cfc49fa4df5 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -472,7 +472,9 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name) /** * evm_inode_setattr - prevent updating an invalid EVM extended attribute + * @idmap: idmap of the mount * @dentry: pointer to the affected dentry + * @attr: iattr structure containing the new file attributes * * Permit update of file attributes when files have a valid EVM signature, * except in the case of them having an immutable portable signature. From 628709a05708918d17beddc9440b2d64320929c0 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Tue, 6 Jun 2023 09:41:13 +0200 Subject: [PATCH 039/509] ima: Fix build warnings [ Upstream commit 95526d13038c2bbddd567a4d8e39fac42484e182 ] Fix build warnings (function parameters description) for ima_collect_modsig(), ima_match_policy() and ima_parse_add_rule(). Fixes: 15588227e086 ("ima: Collect modsig") # v5.4+ Fixes: 2fe5d6def167 ("ima: integrity appraisal extension") # v5.14+ Fixes: 4af4662fa4a9 ("integrity: IMA policy") # v3.2+ Signed-off-by: Roberto Sassu Signed-off-by: Mimi Zohar Signed-off-by: Sasha Levin --- security/integrity/ima/ima_modsig.c | 3 +++ security/integrity/ima/ima_policy.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/security/integrity/ima/ima_modsig.c b/security/integrity/ima/ima_modsig.c index fb25723c65bc..3e7bee30080f 100644 --- a/security/integrity/ima/ima_modsig.c +++ b/security/integrity/ima/ima_modsig.c @@ -89,6 +89,9 @@ int ima_read_modsig(enum ima_hooks func, const void *buf, loff_t buf_len, /** * ima_collect_modsig - Calculate the file hash without the appended signature. + * @modsig: parsed module signature + * @buf: data to verify the signature on + * @size: data size * * Since the modsig is part of the file contents, the hash used in its signature * isn't the same one ordinarily calculated by IMA. Therefore PKCS7 code diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 96ecb7d25403..1c403e8a8044 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -628,6 +628,7 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func) * @secid: LSM secid of the task to be validated * @func: IMA hook identifier * @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC) + * @flags: IMA actions to consider (e.g. IMA_MEASURE | IMA_APPRAISE) * @pcr: set the pcr to extend * @template_desc: the template that should be used for this rule * @keyring: the keyring name, if given, to be used to check in the policy. @@ -1515,7 +1516,7 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) /** * ima_parse_add_rule - add a rule to ima_policy_rules - * @rule - ima measurement policy rule + * @rule: ima measurement policy rule * * Avoid locking by allowing just one writer at a time in ima_write_policy() * Returns the length of the rule parsed, an error code on failure From a14cb307267ba7a1715403e071bdc4deda77eef5 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 14 Jun 2023 17:37:33 +0800 Subject: [PATCH 040/509] pstore/ram: Add check for kstrdup [ Upstream commit d97038d5ec2062733c1e016caf9baaf68cf64ea1 ] Add check for the return value of kstrdup() and return the error if it fails in order to avoid NULL pointer dereference. Fixes: e163fdb3f7f8 ("pstore/ram: Regularize prz label allocation lifetime") Signed-off-by: Jiasheng Jiang Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20230614093733.36048-1-jiasheng@iscas.ac.cn Signed-off-by: Sasha Levin --- fs/pstore/ram_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 184cb97c83bd..b6183f1f4ebc 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -577,6 +577,8 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size, raw_spin_lock_init(&prz->buffer_lock); prz->flags = flags; prz->label = kstrdup(label, GFP_KERNEL); + if (!prz->label) + goto err; ret = persistent_ram_buffer_map(start, size, prz, memtype); if (ret) From cbd0f41a5362a1500eec3b3d79fbe8c1b74bf46d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:42 +0200 Subject: [PATCH 041/509] igc: Enable and fix RX hash usage by netstack [ Upstream commit 84214ab4689f962b4bfc47fc9a5838d25ac4274d ] When function igc_rx_hash() was introduced in v4.20 via commit 0507ef8a0372 ("igc: Add transmit and receive fastpath and interrupt handlers"), the hardware wasn't configured to provide RSS hash, thus it made sense to not enable net_device NETIF_F_RXHASH feature bit. The NIC hardware was configured to enable RSS hash info in v5.2 via commit 2121c2712f82 ("igc: Add multiple receive queues control supporting"), but forgot to set the NETIF_F_RXHASH feature bit. The original implementation of igc_rx_hash() didn't extract the associated pkt_hash_type, but statically set PKT_HASH_TYPE_L3. The largest portions of this patch are about extracting the RSS Type from the hardware and mapping this to enum pkt_hash_types. This was based on Foxville i225 software user manual rev-1.3.1 and tested on Intel Ethernet Controller I225-LM (rev 03). For UDP it's worth noting that RSS (type) hashing have been disabled both for IPv4 and IPv6 (see IGC_MRQC_RSS_FIELD_IPV4_UDP + IGC_MRQC_RSS_FIELD_IPV6_UDP) because hardware RSS doesn't handle fragmented pkts well when enabled (can cause out-of-order). This results in PKT_HASH_TYPE_L3 for UDP packets, and hash value doesn't include UDP port numbers. Not being PKT_HASH_TYPE_L4, have the effect that netstack will do a software based hash calc calling into flow_dissect, but only when code calls skb_get_hash(), which doesn't necessary happen for local delivery. For QA verification testing I wrote a small bpftrace prog: [0] https://github.com/xdp-project/xdp-project/blob/master/areas/hints/monitor_skb_hash_on_dev.bt Fixes: 2121c2712f82 ("igc: Add multiple receive queues control supporting") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182464270.616355.11391652654430626584.stgit@firesoul Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc.h | 28 ++++++++++++++++++++ drivers/net/ethernet/intel/igc/igc_main.c | 31 ++++++++++++++++++++--- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 970dd878d8a7..47ba1eafcdc7 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "igc_hw.h" @@ -272,6 +273,33 @@ extern char igc_driver_name[]; #define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 #define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +/* RX-desc Write-Back format RSS Type's */ +enum igc_rss_type_num { + IGC_RSS_TYPE_NO_HASH = 0, + IGC_RSS_TYPE_HASH_TCP_IPV4 = 1, + IGC_RSS_TYPE_HASH_IPV4 = 2, + IGC_RSS_TYPE_HASH_TCP_IPV6 = 3, + IGC_RSS_TYPE_HASH_IPV6_EX = 4, + IGC_RSS_TYPE_HASH_IPV6 = 5, + IGC_RSS_TYPE_HASH_TCP_IPV6_EX = 6, + IGC_RSS_TYPE_HASH_UDP_IPV4 = 7, + IGC_RSS_TYPE_HASH_UDP_IPV6 = 8, + IGC_RSS_TYPE_HASH_UDP_IPV6_EX = 9, + IGC_RSS_TYPE_MAX = 10, +}; +#define IGC_RSS_TYPE_MAX_TABLE 16 +#define IGC_RSS_TYPE_MASK GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */ + +/* igc_rss_type - Rx descriptor RSS type field */ +static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) +{ + /* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved) + * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info) + * is slightly slower than via u32 (wb.lower.lo_dword.data) + */ + return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK); +} + /* Interrupt defines */ #define IGC_START_ITR 648 /* ~6000 ints/sec */ #define IGC_4K_ITR 980 diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 3aa0efb542aa..72d7d2cf126d 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1569,14 +1569,36 @@ static void igc_rx_checksum(struct igc_ring *ring, le32_to_cpu(rx_desc->wb.upper.status_error)); } +/* Mapping HW RSS Type to enum pkt_hash_types */ +static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, + [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ + [13] = PKT_HASH_TYPE_NONE, + [14] = PKT_HASH_TYPE_NONE, + [15] = PKT_HASH_TYPE_NONE, +}; + static inline void igc_rx_hash(struct igc_ring *ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) { - if (ring->netdev->features & NETIF_F_RXHASH) - skb_set_hash(skb, - le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - PKT_HASH_TYPE_L3); + if (ring->netdev->features & NETIF_F_RXHASH) { + u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); + u32 rss_type = igc_rss_type(rx_desc); + + skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); + } } /** @@ -5257,6 +5279,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; netdev->features |= NETIF_F_TSO_ECN; + netdev->features |= NETIF_F_RXHASH; netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SCTP_CRC; From 0f3f41b47533179d027c433e8f30dea5b6ebe833 Mon Sep 17 00:00:00 2001 From: Peter Seiderer Date: Wed, 26 Apr 2023 17:35:00 +0300 Subject: [PATCH 042/509] wifi: ath9k: fix AR9003 mac hardware hang check register offset calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3e56c80931c7615250fe4bf83f93b57881969266 ] Fix ath9k_hw_verify_hang()/ar9003_hw_detect_mac_hang() register offset calculation (do not overflow the shift for the second register/queues above five, use the register layout described in the comments above ath9k_hw_verify_hang() instead). Fixes: 222e04830ff0 ("ath9k: Fix MAC HW hang check for AR9003") Reported-by: Gregg Wonderly Link: https://lore.kernel.org/linux-wireless/E3A9C354-0CB7-420C-ADEF-F0177FB722F4@seqtechllc.com/ Signed-off-by: Peter Seiderer Acked-by: Toke HĆøiland-JĆørgensen Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230422212423.26065-1-ps.report@gmx.net Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/ar9003_hw.c | 27 ++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c index 42f00a2a8c80..cf5648188459 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c @@ -1099,17 +1099,22 @@ static bool ath9k_hw_verify_hang(struct ath_hw *ah, unsigned int queue) { u32 dma_dbg_chain, dma_dbg_complete; u8 dcu_chain_state, dcu_complete_state; + unsigned int dbg_reg, reg_offset; int i; - for (i = 0; i < NUM_STATUS_READS; i++) { - if (queue < 6) - dma_dbg_chain = REG_READ(ah, AR_DMADBG_4); - else - dma_dbg_chain = REG_READ(ah, AR_DMADBG_5); + if (queue < 6) { + dbg_reg = AR_DMADBG_4; + reg_offset = queue * 5; + } else { + dbg_reg = AR_DMADBG_5; + reg_offset = (queue - 6) * 5; + } + for (i = 0; i < NUM_STATUS_READS; i++) { + dma_dbg_chain = REG_READ(ah, dbg_reg); dma_dbg_complete = REG_READ(ah, AR_DMADBG_6); - dcu_chain_state = (dma_dbg_chain >> (5 * queue)) & 0x1f; + dcu_chain_state = (dma_dbg_chain >> reg_offset) & 0x1f; dcu_complete_state = dma_dbg_complete & 0x3; if ((dcu_chain_state != 0x6) || (dcu_complete_state != 0x1)) @@ -1128,6 +1133,7 @@ static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah) u8 dcu_chain_state, dcu_complete_state; bool dcu_wait_frdone = false; unsigned long chk_dcu = 0; + unsigned int reg_offset; unsigned int i = 0; dma_dbg_4 = REG_READ(ah, AR_DMADBG_4); @@ -1139,12 +1145,15 @@ static bool ar9003_hw_detect_mac_hang(struct ath_hw *ah) goto exit; for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) { - if (i < 6) + if (i < 6) { chk_dbg = dma_dbg_4; - else + reg_offset = i * 5; + } else { chk_dbg = dma_dbg_5; + reg_offset = (i - 6) * 5; + } - dcu_chain_state = (chk_dbg >> (5 * i)) & 0x1f; + dcu_chain_state = (chk_dbg >> reg_offset) & 0x1f; if (dcu_chain_state == 0x6) { dcu_wait_frdone = true; chk_dcu |= BIT(i); From 250efb4d3f5b32a115ea6bf25437ba44a1b3c04f Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 26 Apr 2023 17:35:01 +0300 Subject: [PATCH 043/509] wifi: ath9k: avoid referencing uninit memory in ath9k_wmi_ctrl_rx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f24292e827088bba8de7158501ac25a59b064953 ] For the reasons also described in commit b383e8abed41 ("wifi: ath9k: avoid uninit memory read in ath9k_htc_rx_msg()"), ath9k_htc_rx_msg() should validate pkt_len before accessing the SKB. For example, the obtained SKB may have been badly constructed with pkt_len = 8. In this case, the SKB can only contain a valid htc_frame_hdr but after being processed in ath9k_htc_rx_msg() and passed to ath9k_wmi_ctrl_rx() endpoint RX handler, it is expected to have a WMI command header which should be located inside its data payload. Implement sanity checking inside ath9k_wmi_ctrl_rx(). Otherwise, uninit memory can be referenced. Tested on Qualcomm Atheros Communications AR9271 802.11n . Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: fb9987d0f748 ("ath9k_htc: Support for AR9271 chipset.") Reported-and-tested-by: syzbot+f2cb6e0ffdb961921e4d@syzkaller.appspotmail.com Signed-off-by: Fedor Pchelkin Acked-by: Toke HĆøiland-JĆørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230424183348.111355-1-pchelkin@ispras.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/wmi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c index 19345b8f7bfd..d652c647d56b 100644 --- a/drivers/net/wireless/ath/ath9k/wmi.c +++ b/drivers/net/wireless/ath/ath9k/wmi.c @@ -221,6 +221,10 @@ static void ath9k_wmi_ctrl_rx(void *priv, struct sk_buff *skb, if (unlikely(wmi->stopped)) goto free_skb; + /* Validate the obtained SKB. */ + if (unlikely(skb->len < sizeof(struct wmi_cmd_hdr))) + goto free_skb; + hdr = (struct wmi_cmd_hdr *) skb->data; cmd_id = be16_to_cpu(hdr->command_id); From bd3e880dce27d225598730d2bbb3dc05b443af22 Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Fri, 5 May 2023 16:50:58 +0800 Subject: [PATCH 044/509] samples/bpf: Fix buffer overflow in tcp_basertt [ Upstream commit f4dea9689c5fea3d07170c2cb0703e216f1a0922 ] Using sizeof(nv) or strlen(nv)+1 is correct. Fixes: c890063e4404 ("bpf: sample BPF_SOCKET_OPS_BASE_RTT program") Signed-off-by: Pengcheng Yang Link: https://lore.kernel.org/r/1683276658-2860-1-git-send-email-yangpc@wangsu.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- samples/bpf/tcp_basertt_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c index 8dfe09a92fec..822b0742b815 100644 --- a/samples/bpf/tcp_basertt_kern.c +++ b/samples/bpf/tcp_basertt_kern.c @@ -47,7 +47,7 @@ int bpf_basertt(struct bpf_sock_ops *skops) case BPF_SOCK_OPS_BASE_RTT: n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION, cong, sizeof(cong)); - if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) { + if (!n && !__builtin_memcmp(cong, nv, sizeof(nv))) { /* Set base_rtt to 80us */ rv = 80; } else if (n) { From 795ef550307c02a4e15b7b81e7b243c51e6df317 Mon Sep 17 00:00:00 2001 From: Vijaya Krishna Nivarthi Date: Tue, 25 Apr 2023 14:12:08 +0530 Subject: [PATCH 045/509] spi: spi-geni-qcom: Correct CS_TOGGLE bit in SPI_TRANS_CFG [ Upstream commit 5fd7c99ecf45c8ee8a9b1268f0ffc91cc6271da2 ] The CS_TOGGLE bit when set is supposed to instruct FW to toggle CS line between words. The driver with intent of disabling this behaviour has been unsetting BIT(0). This has not caused any trouble so far because the original BIT(1) is untouched and BIT(0) likely wasn't being used. Correct this to prevent a potential future bug. Signed-off-by: Vijaya Krishna Nivarthi --- drivers/spi/spi-geni-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 01ef79f15b02..be259c685cc8 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -32,7 +32,7 @@ #define CS_DEMUX_OUTPUT_SEL GENMASK(3, 0) #define SE_SPI_TRANS_CFG 0x25c -#define CS_TOGGLE BIT(0) +#define CS_TOGGLE BIT(1) #define SE_SPI_WORD_LEN 0x268 #define WORD_LEN_MSK GENMASK(9, 0) From 3ae910a375b67e51787978f224f5c4466cd4aa6e Mon Sep 17 00:00:00 2001 From: Amisha Patel Date: Fri, 21 Apr 2023 18:10:20 +0000 Subject: [PATCH 046/509] wifi: wilc1000: fix for absent RSN capabilities WFA testcase [ Upstream commit 9ce4bb09123e9754996e358bd808d39f5d112899 ] Mandatory WFA testcase CT_Security_WPA2Personal_STA_RSNEBoundsVerification-AbsentRSNCap, performs bounds verfication on Beacon and/or Probe response frames. It failed and observed the reason to be absence of cipher suite and AKM suite in RSN information. To fix this, enable the RSN flag before extracting RSN capabilities. Fixes: cd21d99e595e ("wifi: wilc1000: validate pairwise and authentication suite offsets") Signed-off-by: Amisha Patel Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230421181005.4865-1-amisha.patel@microchip.com Signed-off-by: Sasha Levin --- drivers/net/wireless/microchip/wilc1000/hif.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index b25847799138..884f45e627a7 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -470,6 +470,9 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; + param->mode_802_11i = 2; + param->rsn_found = true; + /* extract RSN capabilities */ if (offset < rsn_ie_len) { /* skip over pairwise suites */ @@ -479,11 +482,8 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, /* skip over authentication suites */ offset += (rsn_ie[offset] * 4) + 2; - if (offset + 1 < rsn_ie_len) { - param->mode_802_11i = 2; - param->rsn_found = true; + if (offset + 1 < rsn_ie_len) memcpy(param->rsn_cap, &rsn_ie[offset], 2); - } } } } From c62e2ac02e288b7e0039037e8e2e9759dafe8acf Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 May 2023 15:53:15 +0200 Subject: [PATCH 047/509] wifi: mwifiex: Fix the size of a memory allocation in mwifiex_ret_802_11_scan() [ Upstream commit d9aef04fcfa81ee4fb2804a21a3712b7bbd936af ] The type of "mwifiex_adapter->nd_info" is "struct cfg80211_wowlan_nd_info", not "struct cfg80211_wowlan_nd_match". Use struct_size() to ease the computation of the needed size. The current code over-allocates some memory, so is safe. But it wastes 32 bytes. Fixes: 7d7f07d8c5d3 ("mwifiex: add wowlan net-detect support") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/7a6074fb056d2181e058a3cc6048d8155c20aec7.1683371982.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/scan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index c2a685f63e95..78ef40e315b5 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -2200,9 +2200,9 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv, if (nd_config) { adapter->nd_info = - kzalloc(sizeof(struct cfg80211_wowlan_nd_match) + - sizeof(struct cfg80211_wowlan_nd_match *) * - scan_rsp->number_of_sets, GFP_ATOMIC); + kzalloc(struct_size(adapter->nd_info, matches, + scan_rsp->number_of_sets), + GFP_ATOMIC); if (adapter->nd_info) adapter->nd_info->n_matches = scan_rsp->number_of_sets; From 08f61a34913558e06576e7b6318bf583f273c1be Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 15 Jan 2021 08:34:59 -0800 Subject: [PATCH 048/509] bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE [ Upstream commit 9cacf81f8161111db25f98e78a7a0e32ae142b3f ] Add custom implementation of getsockopt hook for TCP_ZEROCOPY_RECEIVE. We skip generic hooks for TCP_ZEROCOPY_RECEIVE and have a custom call in do_tcp_getsockopt using the on-stack data. This removes 3% overhead for locking/unlocking the socket. Without this patch: 3.38% 0.07% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt | --3.30%--__cgroup_bpf_run_filter_getsockopt | --0.81%--__kmalloc With the patch applied: 0.52% 0.12% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt_kern Note, exporting uapi/tcp.h requires removing netinet/tcp.h from test_progs.h because those headers have confliciting definitions. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210115163501.805133-2-sdf@google.com Stable-dep-of: 2598619e012c ("sctp: add bpf_bypass_getsockopt proto callback") Signed-off-by: Sasha Levin --- include/linux/bpf-cgroup.h | 27 +- include/linux/indirect_call_wrapper.h | 6 + include/net/sock.h | 2 + include/net/tcp.h | 1 + kernel/bpf/cgroup.c | 46 +++ net/ipv4/tcp.c | 14 + net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + net/socket.c | 3 + tools/include/uapi/linux/tcp.h | 357 ++++++++++++++++++ .../selftests/bpf/prog_tests/bpf_tcp_ca.c | 1 + .../selftests/bpf/prog_tests/cls_redirect.c | 1 + .../selftests/bpf/prog_tests/sockmap_basic.c | 1 + .../selftests/bpf/prog_tests/sockopt_sk.c | 28 ++ .../testing/selftests/bpf/progs/sockopt_sk.c | 23 +- tools/testing/selftests/bpf/test_progs.h | 1 - 16 files changed, 506 insertions(+), 7 deletions(-) create mode 100644 tools/include/uapi/linux/tcp.h diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 91b966978541..53702b83ce5f 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -158,6 +158,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, int __user *optlen, int max_optlen, int retval); +int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, + int optname, void *optval, + int *optlen, int retval); + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -404,10 +408,23 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, ({ \ int __ret = retval; \ if (cgroup_bpf_enabled) \ - __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \ - optname, optval, \ - optlen, max_optlen, \ - retval); \ + if (!(sock)->sk_prot->bpf_bypass_getsockopt || \ + !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \ + tcp_bpf_bypass_getsockopt, \ + level, optname)) \ + __ret = __cgroup_bpf_run_filter_getsockopt( \ + sock, level, optname, optval, optlen, \ + max_optlen, retval); \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ + optlen, retval) \ +({ \ + int __ret = retval; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_getsockopt_kern( \ + sock, level, optname, optval, optlen, retval); \ __ret; \ }) @@ -493,6 +510,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) +#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ + optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 54c02c84906a..cfcfef37b2f1 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -60,4 +60,10 @@ #define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__) #endif +#if IS_ENABLED(CONFIG_INET) +#define INDIRECT_CALL_INET_1(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__) +#else +#define INDIRECT_CALL_INET_1(f, f1, ...) f(__VA_ARGS__) +#endif + #endif diff --git a/include/net/sock.h b/include/net/sock.h index 51b499d74549..03e7f7581559 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1207,6 +1207,8 @@ struct proto { int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); + bool (*bpf_bypass_getsockopt)(int level, + int optname); void (*release_cb)(struct sock *sk); diff --git a/include/net/tcp.h b/include/net/tcp.h index d213b86a4822..e231101e5001 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -389,6 +389,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); +bool tcp_bpf_bypass_getsockopt(int level, int optname); int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index d3593a520bb7..85927c2aa343 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1546,6 +1546,52 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, sockopt_free_buf(&ctx); return ret; } + +int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, + int optname, void *optval, + int *optlen, int retval) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_sockopt_kern ctx = { + .sk = sk, + .level = level, + .optname = optname, + .retval = retval, + .optlen = *optlen, + .optval = optval, + .optval_end = optval + *optlen, + }; + int ret; + + /* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy + * user data back into BPF buffer when reval != 0. This is + * done as an optimization to avoid extra copy, assuming + * kernel won't populate the data in case of an error. + * Here we always pass the data and memset() should + * be called if that data shouldn't be "exported". + */ + + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], + &ctx, BPF_PROG_RUN); + if (!ret) + return -EPERM; + + if (ctx.optlen > *optlen) + return -EFAULT; + + /* BPF programs only allowed to set retval to 0, not some + * arbitrary value. + */ + if (ctx.retval != 0 && ctx.retval != retval) + return -EFAULT; + + /* BPF programs can shrink the buffer, export the modifications. + */ + if (ctx.optlen != 0) + *optlen = ctx.optlen; + + return ctx.retval; +} #endif static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 82abbf192985..cc42ceadc112 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3970,6 +3970,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); + err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, + &zc, &len, err); release_sock(sk); if (len >= offsetofend(struct tcp_zerocopy_receive, err)) goto zerocopy_rcv_sk_err; @@ -4004,6 +4006,18 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; } +bool tcp_bpf_bypass_getsockopt(int level, int optname) +{ + /* TCP do_tcp_getsockopt has optimized getsockopt implementation + * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE. + */ + if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE) + return true; + + return false; +} +EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt); + int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 270b20e0907c..d62d5d7764ad 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2805,6 +2805,7 @@ struct proto tcp_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fe29bc66aeac..5392aebd48f1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2135,6 +2135,7 @@ struct proto tcpv6_prot = { .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, + .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, diff --git a/net/socket.c b/net/socket.c index 84223419da86..f2172b756c0f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2137,6 +2137,9 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, return __sys_setsockopt(fd, level, optname, optval, optlen); } +INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, + int optname)); + /* * Get a socket option. Because we don't know the option lengths we have * to pass a user mode parameter for the protocols to sort out. diff --git a/tools/include/uapi/linux/tcp.h b/tools/include/uapi/linux/tcp.h new file mode 100644 index 000000000000..13ceeb395eb8 --- /dev/null +++ b/tools/include/uapi/linux/tcp.h @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol. + * + * Version: @(#)tcp.h 1.0.2 04/28/93 + * + * Author: Fred N. van Kempen, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _UAPI_LINUX_TCP_H +#define _UAPI_LINUX_TCP_H + +#include +#include +#include + +struct tcphdr { + __be16 source; + __be16 dest; + __be32 seq; + __be32 ack_seq; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 doff:4, + res1:4, + cwr:1, + ece:1, + urg:1, + ack:1, + psh:1, + rst:1, + syn:1, + fin:1; +#else +#error "Adjust your defines" +#endif + __be16 window; + __sum16 check; + __be16 urg_ptr; +}; + +/* + * The union cast uses a gcc extension to avoid aliasing problems + * (union is compatible to any of its members) + * This means this part of the code is -fstrict-aliasing safe now. + */ +union tcp_word_hdr { + struct tcphdr hdr; + __be32 words[5]; +}; + +#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) + +enum { + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) +}; + +/* + * TCP general constants + */ +#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */ +#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */ + +/* TCP socket options */ +#define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */ +#define TCP_MAXSEG 2 /* Limit MSS */ +#define TCP_CORK 3 /* Never send partially complete segments */ +#define TCP_KEEPIDLE 4 /* Start keeplives after this period */ +#define TCP_KEEPINTVL 5 /* Interval between keepalives */ +#define TCP_KEEPCNT 6 /* Number of keepalives before death */ +#define TCP_SYNCNT 7 /* Number of SYN retransmits */ +#define TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ +#define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ +#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ +#define TCP_INFO 11 /* Information about this connection. */ +#define TCP_QUICKACK 12 /* Block/reenable quick acks */ +#define TCP_CONGESTION 13 /* Congestion control algorithm */ +#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ +#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ +#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ +#define TCP_REPAIR 19 /* TCP sock is under repair right now */ +#define TCP_REPAIR_QUEUE 20 +#define TCP_QUEUE_SEQ 21 +#define TCP_REPAIR_OPTIONS 22 +#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ +#define TCP_TIMESTAMP 24 +#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */ +#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ +#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ +#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ +#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ +#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ +#define TCP_ULP 31 /* Attach a ULP to a TCP connection */ +#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */ +#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */ +#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */ +#define TCP_ZEROCOPY_RECEIVE 35 +#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */ + +#define TCP_CM_INQ TCP_INQ + +#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ + + +#define TCP_REPAIR_ON 1 +#define TCP_REPAIR_OFF 0 +#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ + +struct tcp_repair_opt { + __u32 opt_code; + __u32 opt_val; +}; + +struct tcp_repair_window { + __u32 snd_wl1; + __u32 snd_wnd; + __u32 max_window; + + __u32 rcv_wnd; + __u32 rcv_wup; +}; + +enum { + TCP_NO_QUEUE, + TCP_RECV_QUEUE, + TCP_SEND_QUEUE, + TCP_QUEUES_NR, +}; + +/* why fastopen failed from client perspective */ +enum tcp_fastopen_client_fail { + TFO_STATUS_UNSPEC, /* catch-all */ + TFO_COOKIE_UNAVAILABLE, /* if not in TFO_CLIENT_NO_COOKIE mode */ + TFO_DATA_NOT_ACKED, /* SYN-ACK did not ack SYN data */ + TFO_SYN_RETRANSMITTED, /* SYN-ACK did not ack SYN data after timeout */ +}; + +/* for TCP_INFO socket option */ +#define TCPI_OPT_TIMESTAMPS 1 +#define TCPI_OPT_SACK 2 +#define TCPI_OPT_WSCALE 4 +#define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */ +#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */ +#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */ + +/* + * Sender's congestion state indicating normal or abnormal situations + * in the last round of packets sent. The state is driven by the ACK + * information and timer events. + */ +enum tcp_ca_state { + /* + * Nothing bad has been observed recently. + * No apparent reordering, packet loss, or ECN marks. + */ + TCP_CA_Open = 0, +#define TCPF_CA_Open (1< +#include #include #include "bpf_dctcp.skel.h" #include "bpf_cubic.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 9781d85cb223..e075d03ab630 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -7,6 +7,7 @@ #include #include +#include #include diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 85f73261fab0..b8b48cac2ac3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Cloudflare #include +#include #include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index b25c9c45c148..d5b44b135c00 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -2,6 +2,12 @@ #include #include "cgroup_helpers.h" +#include + +#ifndef SOL_TCP +#define SOL_TCP IPPROTO_TCP +#endif + #define SOL_CUSTOM 0xdeadbeef static int getsetsockopt(void) @@ -11,6 +17,7 @@ static int getsetsockopt(void) char u8[4]; __u32 u32; char cc[16]; /* TCP_CA_NAME_MAX */ + struct tcp_zerocopy_receive zc; } buf = {}; socklen_t optlen; char *big_buf = NULL; @@ -154,6 +161,27 @@ static int getsetsockopt(void) goto err; } + /* TCP_ZEROCOPY_RECEIVE triggers */ + memset(&buf, 0, sizeof(buf)); + optlen = sizeof(buf.zc); + err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); + if (err) { + log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", + err, errno); + goto err; + } + + memset(&buf, 0, sizeof(buf)); + buf.zc.address = 12345; /* rejected by BPF */ + optlen = sizeof(buf.zc); + errno = 0; + err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); + if (errno != EPERM) { + log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", + err, errno); + goto err; + } + free(big_buf); close(fd); return 0; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index 712df7b49cb1..d3597f81e6e9 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include -#include +#include #include +#include #include char _license[] SEC("license") = "GPL"; @@ -12,6 +12,10 @@ __u32 _version SEC("version") = 1; #define PAGE_SIZE 4096 #endif +#ifndef SOL_TCP +#define SOL_TCP IPPROTO_TCP +#endif + #define SOL_CUSTOM 0xdeadbeef struct sockopt_sk { @@ -57,6 +61,21 @@ int _getsockopt(struct bpf_sockopt *ctx) return 1; } + if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { + /* Verify that TCP_ZEROCOPY_RECEIVE triggers. + * It has a custom implementation for performance + * reasons. + */ + + if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) + return 0; /* EPERM, bounds check */ + + if (((struct tcp_zerocopy_receive *)optval)->address != 0) + return 0; /* EPERM, unexpected data */ + + return 1; + } + if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 238f5f61189e..1d429d67f8dd 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -16,7 +16,6 @@ typedef __u16 __sum16; #include #include #include -#include #include #include #include From b190ced50a5e98a604e9d029080f93ea78d63400 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 11 May 2023 15:25:06 +0200 Subject: [PATCH 049/509] sctp: add bpf_bypass_getsockopt proto callback [ Upstream commit 2598619e012cee5273a2821441b9a051ad931249 ] Implement ->bpf_bypass_getsockopt proto callback and filter out SCTP_SOCKOPT_PEELOFF, SCTP_SOCKOPT_PEELOFF_FLAGS and SCTP_SOCKOPT_CONNECTX3 socket options from running eBPF hook on them. SCTP_SOCKOPT_PEELOFF and SCTP_SOCKOPT_PEELOFF_FLAGS options do fd_install(), and if BPF_CGROUP_RUN_PROG_GETSOCKOPT hook returns an error after success of the original handler sctp_getsockopt(...), userspace will receive an error from getsockopt syscall and will be not aware that fd was successfully installed into a fdtable. As pointed by Marcelo Ricardo Leitner it seems reasonable to skip bpf getsockopt hook for SCTP_SOCKOPT_CONNECTX3 sockopt too. Because internaly, it triggers connect() and if error is masked then userspace will be confused. This patch was born as a result of discussion around a new SCM_PIDFD interface: https://lore.kernel.org/all/20230413133355.350571-3-aleksandr.mikhalitsyn@canonical.com/ Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Cc: Daniel Borkmann Cc: Christian Brauner Cc: Stanislav Fomichev Cc: Neil Horman Cc: Marcelo Ricardo Leitner Cc: Xin Long Cc: linux-sctp@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Suggested-by: Stanislav Fomichev Acked-by: Stanislav Fomichev Signed-off-by: Alexander Mikhalitsyn Acked-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sctp/socket.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 35d3eee26ea5..4a7f811abae4 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -8039,6 +8039,22 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, return retval; } +static bool sctp_bpf_bypass_getsockopt(int level, int optname) +{ + if (level == SOL_SCTP) { + switch (optname) { + case SCTP_SOCKOPT_PEELOFF: + case SCTP_SOCKOPT_PEELOFF_FLAGS: + case SCTP_SOCKOPT_CONNECTX3: + return true; + default: + return false; + } + } + + return false; +} + static int sctp_hash(struct sock *sk) { /* STUB */ @@ -9407,6 +9423,7 @@ struct proto sctp_prot = { .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, + .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt, .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, @@ -9459,6 +9476,7 @@ struct proto sctpv6_prot = { .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, + .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt, .sendmsg = sctp_sendmsg, .recvmsg = sctp_recvmsg, .bind = sctp_bind, From ef7fe1b5c4fbc9a402bff70cb8aacf570afcab38 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 8 May 2023 23:55:02 -0700 Subject: [PATCH 050/509] libbpf: fix offsetof() and container_of() to work with CO-RE [ Upstream commit bdeeed3498c7871c17465bb4f11d1bc67f9098af ] It seems like __builtin_offset() doesn't preserve CO-RE field relocations properly. So if offsetof() macro is defined through __builtin_offset(), CO-RE-enabled BPF code using container_of() will be subtly and silently broken. To avoid this problem, redefine offsetof() and container_of() in the form that works with CO-RE relocations more reliably. Fixes: 5fbc220862fc ("tools/libpf: Add offsetof/container_of macro in bpf_helpers.h") Reported-by: Lennart Poettering Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230509065502.2306180-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- tools/lib/bpf/bpf_helpers.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 72b251110c4d..1c389b0f5499 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -42,16 +42,21 @@ /* * Helper macro to manipulate data structures */ -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER) -#endif -#ifndef container_of + +/* offsetof() definition that uses __builtin_offset() might not preserve field + * offset CO-RE relocation properly, so force-redefine offsetof() using + * old-school approach which works with CO-RE correctly + */ +#undef offsetof +#define offsetof(type, member) ((unsigned long)&((type *)0)->member) + +/* redefined container_of() to ensure we use the above offsetof() macro */ +#undef container_of #define container_of(ptr, type, member) \ ({ \ void *__mptr = (void *)(ptr); \ ((type *)(__mptr - offsetof(type, member))); \ }) -#endif /* * Helper macro to throw a compilation error if __bpf_unreachable() gets From 0be9de2ea01e8d52646e7310a7eef5459cf07ea8 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 30 Jul 2021 16:41:59 +0200 Subject: [PATCH 051/509] nfc: constify several pointers to u8, char and sk_buff [ Upstream commit 3df40eb3a2ea58bf404a38f15a7a2768e4762cb0 ] Several functions receive pointers to u8, char or sk_buff but do not modify the contents so make them const. This allows doing the same for local variables and in total makes the code a little bit safer. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Jakub Kicinski Stable-dep-of: 0d9b41daa590 ("nfc: llcp: fix possible use of uninitialized variable in nfc_llcp_send_connect()") Signed-off-by: Sasha Levin --- include/net/nfc/nfc.h | 4 ++-- net/nfc/core.c | 4 ++-- net/nfc/hci/llc_shdlc.c | 10 ++++----- net/nfc/llcp.h | 8 +++---- net/nfc/llcp_commands.c | 46 ++++++++++++++++++++++------------------- net/nfc/llcp_core.c | 44 +++++++++++++++++++++------------------ net/nfc/nfc.h | 2 +- 7 files changed, 63 insertions(+), 55 deletions(-) diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 2cd3a261bcbc..32890e43f06c 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -266,7 +266,7 @@ struct sk_buff *nfc_alloc_send_skb(struct nfc_dev *dev, struct sock *sk, struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp); int nfc_set_remote_general_bytes(struct nfc_dev *dev, - u8 *gt, u8 gt_len); + const u8 *gt, u8 gt_len); u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, size_t *gb_len); int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name, @@ -280,7 +280,7 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, - u8 *gb, size_t gb_len); + const u8 *gb, size_t gb_len); int nfc_tm_deactivated(struct nfc_dev *dev); int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb); diff --git a/net/nfc/core.c b/net/nfc/core.c index 2ef56366bd5f..10a3d740d155 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -634,7 +634,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) return rc; } -int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) +int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); @@ -663,7 +663,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb) EXPORT_SYMBOL(nfc_tm_data_received); int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode, - u8 *gb, size_t gb_len) + const u8 *gb, size_t gb_len) { int rc; diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 0eb4ddc056e7..02909e3e91ef 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -123,7 +123,7 @@ static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z) return ((y >= x) || (y < z)) ? true : false; } -static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, +static struct sk_buff *llc_shdlc_alloc_skb(const struct llc_shdlc *shdlc, int payload_len) { struct sk_buff *skb; @@ -137,7 +137,7 @@ static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc, } /* immediately sends an S frame. */ -static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, +static int llc_shdlc_send_s_frame(const struct llc_shdlc *shdlc, enum sframe_type sframe_type, int nr) { int r; @@ -159,7 +159,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc, } /* immediately sends an U frame. skb may contain optional payload */ -static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc, +static int llc_shdlc_send_u_frame(const struct llc_shdlc *shdlc, struct sk_buff *skb, enum uframe_modifier uframe_modifier) { @@ -361,7 +361,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r) wake_up(shdlc->connect_wq); } -static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) +static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc) { struct sk_buff *skb; @@ -377,7 +377,7 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc) return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET); } -static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc) +static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc) { struct sk_buff *skb; diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index 97853c9cefc7..d49d4bf2e37c 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -221,15 +221,15 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock); /* TLV API */ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, - u8 *tlv_array, u16 tlv_array_len); + const u8 *tlv_array, u16 tlv_array_len); int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, - u8 *tlv_array, u16 tlv_array_len); + const u8 *tlv_array, u16 tlv_array_len); /* Commands API */ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); -u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); +u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length); struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap); -struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri, size_t uri_len); void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 475061c79c44..3c4172a5aeb5 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -15,7 +15,7 @@ #include "nfc.h" #include "llcp.h" -static u8 llcp_tlv_length[LLCP_TLV_MAX] = { +static const u8 llcp_tlv_length[LLCP_TLV_MAX] = { 0, 1, /* VERSION */ 2, /* MIUX */ @@ -29,7 +29,7 @@ static u8 llcp_tlv_length[LLCP_TLV_MAX] = { }; -static u8 llcp_tlv8(u8 *tlv, u8 type) +static u8 llcp_tlv8(const u8 *tlv, u8 type) { if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) return 0; @@ -37,7 +37,7 @@ static u8 llcp_tlv8(u8 *tlv, u8 type) return tlv[2]; } -static u16 llcp_tlv16(u8 *tlv, u8 type) +static u16 llcp_tlv16(const u8 *tlv, u8 type) { if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]]) return 0; @@ -46,37 +46,37 @@ static u16 llcp_tlv16(u8 *tlv, u8 type) } -static u8 llcp_tlv_version(u8 *tlv) +static u8 llcp_tlv_version(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_VERSION); } -static u16 llcp_tlv_miux(u8 *tlv) +static u16 llcp_tlv_miux(const u8 *tlv) { return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff; } -static u16 llcp_tlv_wks(u8 *tlv) +static u16 llcp_tlv_wks(const u8 *tlv) { return llcp_tlv16(tlv, LLCP_TLV_WKS); } -static u16 llcp_tlv_lto(u8 *tlv) +static u16 llcp_tlv_lto(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_LTO); } -static u8 llcp_tlv_opt(u8 *tlv) +static u8 llcp_tlv_opt(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_OPT); } -static u8 llcp_tlv_rw(u8 *tlv) +static u8 llcp_tlv_rw(const u8 *tlv) { return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf; } -u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length) +u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length) { u8 *tlv, length; @@ -130,7 +130,7 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap) return sdres; } -struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri, size_t uri_len) { struct nfc_llcp_sdp_tlv *sdreq; @@ -190,9 +190,10 @@ void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head) } int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, - u8 *tlv_array, u16 tlv_array_len) + const u8 *tlv_array, u16 tlv_array_len) { - u8 *tlv = tlv_array, type, length, offset = 0; + const u8 *tlv = tlv_array; + u8 type, length, offset = 0; pr_debug("TLV array length %d\n", tlv_array_len); @@ -239,9 +240,10 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, } int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, - u8 *tlv_array, u16 tlv_array_len) + const u8 *tlv_array, u16 tlv_array_len) { - u8 *tlv = tlv_array, type, length, offset = 0; + const u8 *tlv = tlv_array; + u8 type, length, offset = 0; pr_debug("TLV array length %d\n", tlv_array_len); @@ -295,7 +297,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu, return pdu; } -static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv, +static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv, u8 tlv_length) { /* XXX Add an skb length check */ @@ -389,9 +391,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) { struct nfc_llcp_local *local; struct sk_buff *skb; - u8 *service_name_tlv = NULL, service_name_tlv_length; - u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; + const u8 *service_name_tlv = NULL; + const u8 *miux_tlv = NULL; + const u8 *rw_tlv = NULL; + u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; @@ -465,8 +468,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) { struct nfc_llcp_local *local; struct sk_buff *skb; - u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; + const u8 *miux_tlv = NULL; + const u8 *rw_tlv = NULL; + u8 miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index edadebb3efd2..fd43e75abd94 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -302,7 +302,7 @@ static char *wks[] = { "urn:nfc:sn:snep", }; -static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) +static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len) { int sap, num_wks; @@ -326,7 +326,7 @@ static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len) static struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local, - u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len) { struct sock *sk; struct nfc_llcp_sock *llcp_sock, *tmp_sock; @@ -523,7 +523,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { u8 *gb_cur, version, version_length; u8 lto_length, wks_length, miux_length; - u8 *version_tlv = NULL, *lto_tlv = NULL, + const u8 *version_tlv = NULL, *lto_tlv = NULL, *wks_tlv = NULL, *miux_tlv = NULL; __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; @@ -613,7 +613,7 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) return local->gb; } -int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { struct nfc_llcp_local *local; @@ -640,27 +640,27 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len) local->remote_gb_len - 3); } -static u8 nfc_llcp_dsap(struct sk_buff *pdu) +static u8 nfc_llcp_dsap(const struct sk_buff *pdu) { return (pdu->data[0] & 0xfc) >> 2; } -static u8 nfc_llcp_ptype(struct sk_buff *pdu) +static u8 nfc_llcp_ptype(const struct sk_buff *pdu) { return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6); } -static u8 nfc_llcp_ssap(struct sk_buff *pdu) +static u8 nfc_llcp_ssap(const struct sk_buff *pdu) { return pdu->data[1] & 0x3f; } -static u8 nfc_llcp_ns(struct sk_buff *pdu) +static u8 nfc_llcp_ns(const struct sk_buff *pdu) { return pdu->data[2] >> 4; } -static u8 nfc_llcp_nr(struct sk_buff *pdu) +static u8 nfc_llcp_nr(const struct sk_buff *pdu) { return pdu->data[2] & 0xf; } @@ -802,7 +802,7 @@ static struct nfc_llcp_sock *nfc_llcp_connecting_sock_get(struct nfc_llcp_local } static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, - u8 *sn, size_t sn_len) + const u8 *sn, size_t sn_len) { struct nfc_llcp_sock *llcp_sock; @@ -816,9 +816,10 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local, return llcp_sock; } -static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len) +static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len) { - u8 *tlv = &skb->data[2], type, length; + u8 type, length; + const u8 *tlv = &skb->data[2]; size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0; while (offset < tlv_array_len) { @@ -876,7 +877,7 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, } static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct sock *new_sk, *parent; struct nfc_llcp_sock *sock, *new_sock; @@ -894,7 +895,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, goto fail; } } else { - u8 *sn; + const u8 *sn; size_t sn_len; sn = nfc_llcp_connect_sn(skb, &sn_len); @@ -1113,7 +1114,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, } static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1156,7 +1157,8 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, nfc_llcp_sock_put(llcp_sock); } -static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb) +static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1189,7 +1191,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb) nfc_llcp_sock_put(llcp_sock); } -static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb) +static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; struct sock *sk; @@ -1227,12 +1230,13 @@ static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb) } static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, - struct sk_buff *skb) + const struct sk_buff *skb) { struct nfc_llcp_sock *llcp_sock; - u8 dsap, ssap, *tlv, type, length, tid, sap; + u8 dsap, ssap, type, length, tid, sap; + const u8 *tlv; u16 tlv_len, offset; - char *service_name; + const char *service_name; size_t service_name_len; struct nfc_llcp_sdp_tlv *sdp; HLIST_HEAD(llc_sdres_list); diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 889fefd64e56..de2ec66d7e83 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -48,7 +48,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, u8 comm_mode, u8 rf_mode); int nfc_llcp_register_device(struct nfc_dev *dev); void nfc_llcp_unregister_device(struct nfc_dev *dev); -int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); +int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); From 107e849f3c6a4de0ca597848331e240cc5c36c60 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 13 May 2023 13:52:04 +0200 Subject: [PATCH 052/509] nfc: llcp: fix possible use of uninitialized variable in nfc_llcp_send_connect() [ Upstream commit 0d9b41daa5907756a31772d8af8ac5ff25cf17c1 ] If sock->service_name is NULL, the local variable service_name_tlv_length will not be assigned by nfc_llcp_build_tlv(), later leading to using value frmo the stack. Smatch warning: net/nfc/llcp_commands.c:442 nfc_llcp_send_connect() error: uninitialized symbol 'service_name_tlv_length'. Fixes: de9e5aeb4f40 ("NFC: llcp: Fix usage of llcp_add_tlv()") Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/nfc/llcp_commands.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 3c4172a5aeb5..bb9f40563ff6 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -394,7 +394,8 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) const u8 *service_name_tlv = NULL; const u8 *miux_tlv = NULL; const u8 *rw_tlv = NULL; - u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw; + u8 service_name_tlv_length = 0; + u8 miux_tlv_length, rw_tlv_length, rw; int err; u16 size = 0; __be16 miux; From 78f390aa0eb50501627429c0da7ed8e707230ead Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Fri, 12 May 2023 12:31:34 +0100 Subject: [PATCH 053/509] bpftool: JIT limited misreported as negative value on aarch64 [ Upstream commit 04cb8453a91c7c22f60ddadb6cef0d19abb33bb5 ] On aarch64, "bpftool feature" reports an incorrect BPF JIT limit: $ sudo /sbin/bpftool feature Scanning system configuration... bpf() syscall restricted to privileged users JIT compiler is enabled JIT compiler hardening is disabled JIT compiler kallsyms exports are enabled for root skipping kernel config, can't open file: No such file or directory Global memory limit for JIT compiler for unprivileged users is -201326592 bytes This is because /proc/sys/net/core/bpf_jit_limit reports $ sudo cat /proc/sys/net/core/bpf_jit_limit 68169519595520 ...and an int is assumed in read_procfs(). Change read_procfs() to return a long to avoid negative value reporting. Fixes: 7a4522bbef0c ("tools: bpftool: add probes for /proc/ eBPF parameters") Reported-by: Nicky Veitch Signed-off-by: Alan Maguire Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20230512113134.58996-1-alan.maguire@oracle.com Signed-off-by: Sasha Levin --- tools/bpf/bpftool/feature.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 359960a8f1de..5f0b1397798e 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -135,12 +135,12 @@ static void print_end_section(void) /* Probing functions */ -static int read_procfs(const char *path) +static long read_procfs(const char *path) { char *endptr, *line = NULL; size_t len = 0; FILE *fd; - int res; + long res; fd = fopen(path, "r"); if (!fd) @@ -162,7 +162,7 @@ static int read_procfs(const char *path) static void probe_unprivileged_disabled(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -181,14 +181,14 @@ static void probe_unprivileged_disabled(void) printf("Unable to retrieve required privileges for bpf() syscall\n"); break; default: - printf("bpf() syscall restriction has unknown value %d\n", res); + printf("bpf() syscall restriction has unknown value %ld\n", res); } } } static void probe_jit_enable(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -210,7 +210,7 @@ static void probe_jit_enable(void) printf("Unable to retrieve JIT-compiler status\n"); break; default: - printf("JIT-compiler status has unknown value %d\n", + printf("JIT-compiler status has unknown value %ld\n", res); } } @@ -218,7 +218,7 @@ static void probe_jit_enable(void) static void probe_jit_harden(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -240,7 +240,7 @@ static void probe_jit_harden(void) printf("Unable to retrieve JIT hardening status\n"); break; default: - printf("JIT hardening status has unknown value %d\n", + printf("JIT hardening status has unknown value %ld\n", res); } } @@ -248,7 +248,7 @@ static void probe_jit_harden(void) static void probe_jit_kallsyms(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -267,14 +267,14 @@ static void probe_jit_kallsyms(void) printf("Unable to retrieve JIT kallsyms export status\n"); break; default: - printf("JIT kallsyms exports status has unknown value %d\n", res); + printf("JIT kallsyms exports status has unknown value %ld\n", res); } } } static void probe_jit_limit(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -287,7 +287,7 @@ static void probe_jit_limit(void) printf("Unable to retrieve global memory limit for JIT compiler for unprivileged users\n"); break; default: - printf("Global memory limit for JIT compiler for unprivileged users is %d bytes\n", res); + printf("Global memory limit for JIT compiler for unprivileged users is %ld bytes\n", res); } } } From 92bcd84941260f42a683fc7e3d1ebd8656897fb3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 May 2023 13:13:58 +0200 Subject: [PATCH 054/509] regulator: core: Fix more error checking for debugfs_create_dir() [ Upstream commit 2715bb11cfff964aa33946847f9527cfbd4874f5 ] In case of failure, debugfs_create_dir() does not return NULL, but an error pointer. Most incorrect error checks were fixed, but the one in create_regulator() was forgotten. Fix the remaining error check. Fixes: 2bf1c45be3b8f3a3 ("regulator: Fix error checking for debugfs_create_dir") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/ee980a108b5854dd8ce3630f8f673e784e057d17.1685013051.git.geert+renesas@glider.be Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index f5ab74683b58..e4ff64d28c77 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1751,7 +1751,7 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, if (err != -EEXIST) regulator->debugfs = debugfs_create_dir(supply_name, rdev->debugfs); - if (!regulator->debugfs) { + if (IS_ERR(regulator->debugfs)) { rdev_dbg(rdev, "Failed to create debugfs directory\n"); } else { debugfs_create_u32("uA_load", 0444, regulator->debugfs, From 8782dc2504da44ca27d6d0dc9d854ef44e848c68 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 25 May 2023 13:13:59 +0200 Subject: [PATCH 055/509] regulator: core: Streamline debugfs operations [ Upstream commit 08880713ceec023dd94d634f1e8902728c385939 ] If CONFIG_DEBUG_FS is not set: regulator: Failed to create debugfs directory ... regulator-dummy: Failed to create debugfs directory As per the comments for debugfs_create_dir(), errors returned by this function should be expected, and ignored: * If debugfs is not enabled in the kernel, the value -%ENODEV will be * returned. * * NOTE: it's expected that most callers should _ignore_ the errors returned * by this function. Other debugfs functions handle the fact that the "dentry" * passed to them could be an error and they don't crash in that case. * Drivers should generally work fine even if debugfs fails to init anyway. Adhere to the debugfs spirit, and streamline all operations by: 1. Demoting the importance of the printed error messages to debug level, like is already done in create_regulator(), 2. Further ignoring any returned errors, as by design, all debugfs functions are no-ops when passed an error pointer. Fixes: 2bf1c45be3b8f3a3 ("regulator: Fix error checking for debugfs_create_dir") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/2f8bb6e113359ddfab7b59e4d4274bd4c06d6d0a.1685013051.git.geert+renesas@glider.be Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/core.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index e4ff64d28c77..52b75779dbb7 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1751,19 +1751,17 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, if (err != -EEXIST) regulator->debugfs = debugfs_create_dir(supply_name, rdev->debugfs); - if (IS_ERR(regulator->debugfs)) { + if (IS_ERR(regulator->debugfs)) rdev_dbg(rdev, "Failed to create debugfs directory\n"); - } else { - debugfs_create_u32("uA_load", 0444, regulator->debugfs, - ®ulator->uA_load); - debugfs_create_u32("min_uV", 0444, regulator->debugfs, - ®ulator->voltage[PM_SUSPEND_ON].min_uV); - debugfs_create_u32("max_uV", 0444, regulator->debugfs, - ®ulator->voltage[PM_SUSPEND_ON].max_uV); - debugfs_create_file("constraint_flags", 0444, - regulator->debugfs, regulator, - &constraint_flags_fops); - } + + debugfs_create_u32("uA_load", 0444, regulator->debugfs, + ®ulator->uA_load); + debugfs_create_u32("min_uV", 0444, regulator->debugfs, + ®ulator->voltage[PM_SUSPEND_ON].min_uV); + debugfs_create_u32("max_uV", 0444, regulator->debugfs, + ®ulator->voltage[PM_SUSPEND_ON].max_uV); + debugfs_create_file("constraint_flags", 0444, regulator->debugfs, + regulator, &constraint_flags_fops); /* * Check now if the regulator is an always on regulator - if @@ -5032,10 +5030,8 @@ static void rdev_init_debugfs(struct regulator_dev *rdev) } rdev->debugfs = debugfs_create_dir(rname, debugfs_root); - if (IS_ERR(rdev->debugfs)) { - rdev_warn(rdev, "Failed to create debugfs directory\n"); - return; - } + if (IS_ERR(rdev->debugfs)) + rdev_dbg(rdev, "Failed to create debugfs directory\n"); debugfs_create_u32("use_count", 0444, rdev->debugfs, &rdev->use_count); @@ -5938,7 +5934,7 @@ static int __init regulator_init(void) debugfs_root = debugfs_create_dir("regulator", NULL); if (IS_ERR(debugfs_root)) - pr_warn("regulator: Failed to create debugfs directory\n"); + pr_debug("regulator: Failed to create debugfs directory\n"); #ifdef CONFIG_DEBUG_FS debugfs_create_file("supply_map", 0444, debugfs_root, NULL, From fb7d78feb55a3cd341ddc2d4981362cd24f854b0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 May 2023 09:29:46 +0200 Subject: [PATCH 056/509] wifi: orinoco: Fix an error handling path in spectrum_cs_probe() [ Upstream commit 925244325159824385209e3e0e3f91fa6bf0646c ] Should spectrum_cs_config() fail, some resources need to be released as already done in the remove function. While at it, remove a useless and erroneous comment. The probe is spectrum_cs_probe(), not spectrum_cs_attach(). Fixes: 15b99ac17295 ("[PATCH] pcmcia: add return value to _config() functions") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/c0bc0c21c58ca477fc5521607615bafbf2aef8eb.1684567733.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/net/wireless/intersil/orinoco/spectrum_cs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intersil/orinoco/spectrum_cs.c b/drivers/net/wireless/intersil/orinoco/spectrum_cs.c index 291ef97ed45e..841d623c621a 100644 --- a/drivers/net/wireless/intersil/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/intersil/orinoco/spectrum_cs.c @@ -157,6 +157,7 @@ spectrum_cs_probe(struct pcmcia_device *link) { struct orinoco_private *priv; struct orinoco_pccard *card; + int ret; priv = alloc_orinocodev(sizeof(*card), &link->dev, spectrum_cs_hard_reset, @@ -169,8 +170,16 @@ spectrum_cs_probe(struct pcmcia_device *link) card->p_dev = link; link->priv = priv; - return spectrum_cs_config(link); -} /* spectrum_cs_attach */ + ret = spectrum_cs_config(link); + if (ret) + goto err_free_orinocodev; + + return 0; + +err_free_orinocodev: + free_orinocodev(priv); + return ret; +} static void spectrum_cs_detach(struct pcmcia_device *link) { From 86ebbcbdc7b1838d859ddfa8982275cb45912f9a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 May 2023 09:38:22 +0200 Subject: [PATCH 057/509] wifi: orinoco: Fix an error handling path in orinoco_cs_probe() [ Upstream commit 67a81d911c01225f426cc6bee2373df044c1a9b7 ] Should orinoco_cs_config() fail, some resources need to be released as already done in the remove function. While at it, remove a useless and erroneous comment. The probe is orinoco_cs_probe(), not orinoco_cs_attach(). Fixes: 15b99ac17295 ("[PATCH] pcmcia: add return value to _config() functions") Signed-off-by: Christophe JAILLET Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/e24735ce4d82901d5f7ea08419eea53bfdde3d65.1684568286.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/net/wireless/intersil/orinoco/orinoco_cs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_cs.c b/drivers/net/wireless/intersil/orinoco/orinoco_cs.c index a956f965a1e5..03bfd2482656 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_cs.c @@ -96,6 +96,7 @@ orinoco_cs_probe(struct pcmcia_device *link) { struct orinoco_private *priv; struct orinoco_pccard *card; + int ret; priv = alloc_orinocodev(sizeof(*card), &link->dev, orinoco_cs_hard_reset, NULL); @@ -107,8 +108,16 @@ orinoco_cs_probe(struct pcmcia_device *link) card->p_dev = link; link->priv = priv; - return orinoco_cs_config(link); -} /* orinoco_cs_attach */ + ret = orinoco_cs_config(link); + if (ret) + goto err_free_orinocodev; + + return 0; + +err_free_orinocodev: + free_orinocodev(priv); + return ret; +} static void orinoco_cs_detach(struct pcmcia_device *link) { From 5512db9bd40473238c841d0dc72cc5b19ea77657 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 May 2023 09:53:14 +0200 Subject: [PATCH 058/509] wifi: atmel: Fix an error handling path in atmel_probe() [ Upstream commit 6b92e4351a29af52c285fe235e6e4d1a75de04b2 ] Should atmel_config() fail, some resources need to be released as already done in the remove function. While at it, remove a useless and erroneous comment. The probe is atmel_probe(), not atmel_attach(). Fixes: 15b99ac17295 ("[PATCH] pcmcia: add return value to _config() functions") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1e65f174607a83348034197fa7d603bab10ba4a9.1684569156.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/net/wireless/atmel/atmel_cs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/atmel/atmel_cs.c b/drivers/net/wireless/atmel/atmel_cs.c index 368eebefa741..e64f108d288b 100644 --- a/drivers/net/wireless/atmel/atmel_cs.c +++ b/drivers/net/wireless/atmel/atmel_cs.c @@ -73,6 +73,7 @@ struct local_info { static int atmel_probe(struct pcmcia_device *p_dev) { struct local_info *local; + int ret; dev_dbg(&p_dev->dev, "atmel_attach()\n"); @@ -83,8 +84,16 @@ static int atmel_probe(struct pcmcia_device *p_dev) p_dev->priv = local; - return atmel_config(p_dev); -} /* atmel_attach */ + ret = atmel_config(p_dev); + if (ret) + goto err_free_priv; + + return 0; + +err_free_priv: + kfree(p_dev->priv); + return ret; +} static void atmel_detach(struct pcmcia_device *link) { From 13cf0e3894d17489dd0a30b31555dfcaee39d958 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 2 Nov 2020 11:23:53 +0000 Subject: [PATCH 059/509] wl3501_cs: Fix misspelling and provide missing documentation [ Upstream commit 8b8a6f8c3b50193d161c598a6784e721128d6dc3 ] Fixes the following W=1 kernel build warning(s): In file included from drivers/net/wireless/wl3501_cs.c:57: drivers/net/wireless/wl3501_cs.c:143: warning: Function parameter or member 'reg_domain' not described in 'iw_valid_channel' drivers/net/wireless/wl3501_cs.c:143: warning: Excess function parameter 'reg_comain' description in 'iw_valid_channel' drivers/net/wireless/wl3501_cs.c:469: warning: Function parameter or member 'data' not described in 'wl3501_send_pkt' drivers/net/wireless/wl3501_cs.c:469: warning: Function parameter or member 'len' not described in 'wl3501_send_pkt' Cc: Kalle Valo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Fox Chen Cc: de Melo Cc: Gustavo Niemeyer Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Lee Jones Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201102112410.1049272-25-lee.jones@linaro.org Stable-dep-of: 391af06a02e7 ("wifi: wl3501_cs: Fix an error handling path in wl3501_probe()") Signed-off-by: Sasha Levin --- drivers/net/wireless/wl3501_cs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index ccf6344ed6fd..cb71b73853f4 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -134,7 +134,7 @@ static const struct { /** * iw_valid_channel - validate channel in regulatory domain - * @reg_comain: regulatory domain + * @reg_domain: regulatory domain * @channel: channel to validate * * Returns 0 if invalid in the specified regulatory domain, non-zero if valid. @@ -458,11 +458,9 @@ static int wl3501_pwr_mgmt(struct wl3501_card *this, int suspend) /** * wl3501_send_pkt - Send a packet. * @this: Card - * - * Send a packet. - * - * data = Ethernet raw frame. (e.g. data[0] - data[5] is Dest MAC Addr, + * @data: Ethernet raw frame. (e.g. data[0] - data[5] is Dest MAC Addr, * data[6] - data[11] is Src MAC Addr) + * @len: Packet length * Ref: IEEE 802.11 */ static int wl3501_send_pkt(struct wl3501_card *this, u8 *data, u16 len) From cbd44a9e1cf1952d0641302934afa5c90ead8d00 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Sep 2021 11:10:37 -0700 Subject: [PATCH 060/509] net: create netdev->dev_addr assignment helpers [ Upstream commit 48eab831ae8b9f7002a533fa4235eed63ea1f1a3 ] Recent work on converting address list to a tree made it obvious we need an abstraction around writing netdev->dev_addr. Without such abstraction updating the main device address is invisible to the core. Introduce a number of helpers which for now just wrap memcpy() but in the future can make necessary changes to the address tree. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Stable-dep-of: 391af06a02e7 ("wifi: wl3501_cs: Fix an error handling path in wl3501_probe()") Signed-off-by: Sasha Levin --- include/linux/etherdevice.h | 12 ++++++++++++ include/linux/netdevice.h | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 99209f50915f..b060514bf25d 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -299,6 +299,18 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src) #endif } +/** + * eth_hw_addr_set - Assign Ethernet address to a net_device + * @dev: pointer to net_device structure + * @addr: address to assign + * + * Assign given address to the net_device, addr_assign_type is not changed. + */ +static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr) +{ + ether_addr_copy(dev->dev_addr, addr); +} + /** * eth_hw_addr_inherit - Copy dev_addr from another net_device * @dst: pointer to net_device to copy dev_addr to diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8f03cc42bd43..302abfc2a1f6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4474,6 +4474,24 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ +static inline void +__dev_addr_set(struct net_device *dev, const u8 *addr, size_t len) +{ + memcpy(dev->dev_addr, addr, len); +} + +static inline void dev_addr_set(struct net_device *dev, const u8 *addr) +{ + __dev_addr_set(dev, addr, dev->addr_len); +} + +static inline void +dev_addr_mod(struct net_device *dev, unsigned int offset, + const u8 *addr, size_t len) +{ + memcpy(&dev->dev_addr[offset], addr, len); +} + int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, From b6f793de619b722ceb001b0c2d55e6aac4b81795 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 18 Oct 2021 16:50:20 -0700 Subject: [PATCH 061/509] wl3501_cs: use eth_hw_addr_set() [ Upstream commit 18774612246d036c04ce9fee7f67192f96f48725 ] Commit 406f42fa0d3c ("net-next: When a bond have a massive amount of VLANs...") introduced a rbtree for faster Ethernet address look up. To maintain netdev->dev_addr in this tree we need to make all the writes to it got through appropriate helpers. Signed-off-by: Jakub Kicinski Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20211018235021.1279697-15-kuba@kernel.org Stable-dep-of: 391af06a02e7 ("wifi: wl3501_cs: Fix an error handling path in wl3501_probe()") Signed-off-by: Sasha Levin --- drivers/net/wireless/wl3501_cs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index cb71b73853f4..7351a2c127ad 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1945,8 +1945,7 @@ static int wl3501_config(struct pcmcia_device *link) goto failed; } - for (i = 0; i < 6; i++) - dev->dev_addr[i] = ((char *)&this->mac_addr)[i]; + eth_hw_addr_set(dev, this->mac_addr); /* print probe information */ printk(KERN_INFO "%s: wl3501 @ 0x%3.3x, IRQ %d, " From 18d71562f70de4ec18455b7ca44a8825bc17494b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 May 2023 10:05:08 +0200 Subject: [PATCH 062/509] wifi: wl3501_cs: Fix an error handling path in wl3501_probe() [ Upstream commit 391af06a02e7642039ac5f6c4b2c034ab0992b5d ] Should wl3501_config() fail, some resources need to be released as already done in the remove function. Fixes: 15b99ac17295 ("[PATCH] pcmcia: add return value to _config() functions") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/7cc9c9316489b7d69b36aeb0edd3123538500b41.1684569865.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/net/wireless/wl3501_cs.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 7351a2c127ad..4c408fd7c159 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1862,6 +1862,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev) { struct net_device *dev; struct wl3501_card *this; + int ret; /* The io structure describes IO port mapping */ p_dev->resource[0]->end = 16; @@ -1873,8 +1874,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev) dev = alloc_etherdev(sizeof(struct wl3501_card)); if (!dev) - goto out_link; - + return -ENOMEM; dev->netdev_ops = &wl3501_netdev_ops; dev->watchdog_timeo = 5 * HZ; @@ -1887,9 +1887,15 @@ static int wl3501_probe(struct pcmcia_device *p_dev) netif_stop_queue(dev); p_dev->priv = dev; - return wl3501_config(p_dev); -out_link: - return -ENOMEM; + ret = wl3501_config(p_dev); + if (ret) + goto out_free_etherdev; + + return 0; + +out_free_etherdev: + free_netdev(dev); + return ret; } static int wl3501_config(struct pcmcia_device *link) From a66e3fd3801a88d4154051cc86b4423119e4cbfe Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 3 Jun 2022 19:44:13 +0300 Subject: [PATCH 063/509] wifi: ray_cs: Utilize strnlen() in parse_addr() [ Upstream commit 9e8e9187673cb24324f9165dd47b2b28f60b0b10 ] Instead of doing simple operations and using an additional variable on stack, utilize strnlen() and reuse len variable. Signed-off-by: Andy Shevchenko Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220603164414.48436-1-andriy.shevchenko@linux.intel.com Stable-dep-of: 4f8d66a9fb2e ("wifi: ray_cs: Fix an error handling path in ray_probe()") Signed-off-by: Sasha Levin --- drivers/net/wireless/ray_cs.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 091eea0d958d..5dcd86f81cbf 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -1641,31 +1641,29 @@ static void authenticate_timeout(struct timer_list *t) /*===========================================================================*/ static int parse_addr(char *in_str, UCHAR *out) { + int i, k; int len; - int i, j, k; int status; if (in_str == NULL) return 0; - if ((len = strlen(in_str)) < 2) + len = strnlen(in_str, ADDRLEN * 2 + 1) - 1; + if (len < 1) return 0; memset(out, 0, ADDRLEN); status = 1; - j = len - 1; - if (j > 12) - j = 12; i = 5; - while (j > 0) { - if ((k = hex_to_bin(in_str[j--])) != -1) + while (len > 0) { + if ((k = hex_to_bin(in_str[len--])) != -1) out[i] = k; else return 0; - if (j == 0) + if (len == 0) break; - if ((k = hex_to_bin(in_str[j--])) != -1) + if ((k = hex_to_bin(in_str[len--])) != -1) out[i] += k << 4; else return 0; From 8825991838fc4b9cb9ca24f658fa9d8c94f397c3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 3 Jun 2022 19:44:14 +0300 Subject: [PATCH 064/509] wifi: ray_cs: Drop useless status variable in parse_addr() [ Upstream commit 4dfc63c002a555a2c3c34d89009532ad803be876 ] The status variable assigned only once and used also only once. Replace it's usage by actual value. Signed-off-by: Andy Shevchenko Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20220603164414.48436-2-andriy.shevchenko@linux.intel.com Stable-dep-of: 4f8d66a9fb2e ("wifi: ray_cs: Fix an error handling path in ray_probe()") Signed-off-by: Sasha Levin --- drivers/net/wireless/ray_cs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 5dcd86f81cbf..95d5ce1b6dfa 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -1643,7 +1643,6 @@ static int parse_addr(char *in_str, UCHAR *out) { int i, k; int len; - int status; if (in_str == NULL) return 0; @@ -1652,7 +1651,6 @@ static int parse_addr(char *in_str, UCHAR *out) return 0; memset(out, 0, ADDRLEN); - status = 1; i = 5; while (len > 0) { @@ -1670,7 +1668,7 @@ static int parse_addr(char *in_str, UCHAR *out) if (!i--) break; } - return status; + return 1; } /*===========================================================================*/ From c10c6ea9b3a27ceb42ed343b41f06c906a7d3d13 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 May 2023 10:13:22 +0200 Subject: [PATCH 065/509] wifi: ray_cs: Fix an error handling path in ray_probe() [ Upstream commit 4f8d66a9fb2edcd05c1e563456a55a08910bfb37 ] Should ray_config() fail, some resources need to be released as already done in the remove function. While at it, remove a useless and erroneous comment. The probe is ray_probe(), not ray_attach(). Fixes: 15b99ac17295 ("[PATCH] pcmcia: add return value to _config() functions") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/8c544d18084f8b37dd108e844f7e79e85ff708ff.1684570373.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sasha Levin --- drivers/net/wireless/ray_cs.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 95d5ce1b6dfa..bf1282702761 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -270,13 +270,14 @@ static int ray_probe(struct pcmcia_device *p_dev) { ray_dev_t *local; struct net_device *dev; + int ret; dev_dbg(&p_dev->dev, "ray_attach()\n"); /* Allocate space for private device-specific data */ dev = alloc_etherdev(sizeof(ray_dev_t)); if (!dev) - goto fail_alloc_dev; + return -ENOMEM; local = netdev_priv(dev); local->finder = p_dev; @@ -313,11 +314,16 @@ static int ray_probe(struct pcmcia_device *p_dev) timer_setup(&local->timer, NULL, 0); this_device = p_dev; - return ray_config(p_dev); + ret = ray_config(p_dev); + if (ret) + goto err_free_dev; -fail_alloc_dev: - return -ENOMEM; -} /* ray_attach */ + return 0; + +err_free_dev: + free_netdev(dev); + return ret; +} static void ray_detach(struct pcmcia_device *link) { From 1044187e7249073f719ebbf9e5ffb4f16f99e555 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 17 May 2023 18:03:17 +0300 Subject: [PATCH 066/509] wifi: ath9k: don't allow to overwrite ENDPOINT0 attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 061b0cb9327b80d7a0f63a33e7c3e2a91a71f142 ] A bad USB device is able to construct a service connection response message with target endpoint being ENDPOINT0 which is reserved for HTC_CTRL_RSVD_SVC and should not be modified to be used for any other services. Reject such service connection responses. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Fixes: fb9987d0f748 ("ath9k_htc: Support for AR9271 chipset.") Reported-by: syzbot+b68fbebe56d8362907e8@syzkaller.appspotmail.com Signed-off-by: Fedor Pchelkin Acked-by: Toke HĆøiland-JĆørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230516150427.79469-1-pchelkin@ispras.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc_hst.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c index fe62ff668f75..99667aba289d 100644 --- a/drivers/net/wireless/ath/ath9k/htc_hst.c +++ b/drivers/net/wireless/ath/ath9k/htc_hst.c @@ -114,7 +114,13 @@ static void htc_process_conn_rsp(struct htc_target *target, if (svc_rspmsg->status == HTC_SERVICE_SUCCESS) { epid = svc_rspmsg->endpoint_id; - if (epid < 0 || epid >= ENDPOINT_MAX) + + /* Check that the received epid for the endpoint to attach + * a new service is valid. ENDPOINT0 can't be used here as it + * is already reserved for HTC_CTRL_RSVD_SVC service and thus + * should not be modified. + */ + if (epid <= ENDPOINT0 || epid >= ENDPOINT_MAX) return; service_id = be16_to_cpu(svc_rspmsg->service_id); From b2aeb97fd470206e67f7b3b4a3e68212a13f747b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 28 May 2023 00:28:33 +0200 Subject: [PATCH 067/509] wifi: rsi: Do not configure WoWlan in shutdown hook if not enabled [ Upstream commit b241e260820b68c09586e8a0ae0fc23c0e3215bd ] In case WoWlan was never configured during the operation of the system, the hw->wiphy->wowlan_config will be NULL. rsi_config_wowlan() checks whether wowlan_config is non-NULL and if it is not, then WARNs about it. The warning is valid, as during normal operation the rsi_config_wowlan() should only ever be called with non-NULL wowlan_config. In shutdown this rsi_config_wowlan() should only ever be called if WoWlan was configured before by the user. Add checks for non-NULL wowlan_config into the shutdown hook. While at it, check whether the wiphy is also non-NULL before accessing wowlan_config . Drop the single-use wowlan_config variable, just inline it into function call. Fixes: 16bbc3eb8372 ("rsi: fix null pointer dereference during rsi_shutdown()") Signed-off-by: Marek Vasut Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230527222833.273741-1-marex@denx.de Signed-off-by: Sasha Levin --- drivers/net/wireless/rsi/rsi_91x_sdio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 8108f941ccd3..2c26376faeac 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -1463,10 +1463,8 @@ static void rsi_shutdown(struct device *dev) rsi_dbg(ERR_ZONE, "SDIO Bus shutdown =====>\n"); - if (hw) { - struct cfg80211_wowlan *wowlan = hw->wiphy->wowlan_config; - - if (rsi_config_wowlan(adapter, wowlan)) + if (hw && hw->wiphy && hw->wiphy->wowlan_config) { + if (rsi_config_wowlan(adapter, hw->wiphy->wowlan_config)) rsi_dbg(ERR_ZONE, "Failed to configure WoWLAN\n"); } From 22da8363e35fa143a704e2a7803d26ce605a7012 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 28 May 2023 00:28:59 +0200 Subject: [PATCH 068/509] wifi: rsi: Do not set MMC_PM_KEEP_POWER in shutdown [ Upstream commit e74f562328b03fbe9cf438f958464dff3a644dfc ] It makes no sense to set MMC_PM_KEEP_POWER in shutdown. The flag indicates to the MMC subsystem to keep the slot powered on during suspend, but in shutdown the slot should actually be powered off. Drop this call. Fixes: 063848c3e155 ("rsi: sdio: Add WOWLAN support for S5 shutdown state") Signed-off-by: Marek Vasut Reviewed-by: Simon Horman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230527222859.273768-1-marex@denx.de Signed-off-by: Sasha Levin --- drivers/net/wireless/rsi/rsi_91x_sdio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 2c26376faeac..b1d3aea10d7d 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -1479,9 +1479,6 @@ static void rsi_shutdown(struct device *dev) if (sdev->write_fail) rsi_dbg(INFO_ZONE, "###### Device is not ready #######\n"); - if (rsi_set_sdio_pm_caps(adapter)) - rsi_dbg(INFO_ZONE, "Setting power management caps failed\n"); - rsi_dbg(INFO_ZONE, "***** RSI module shut down *****\n"); } From ac23d7f41426eded0ff81038bc8540da7f3211b1 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 19 May 2023 10:18:25 -0700 Subject: [PATCH 069/509] watchdog/perf: define dummy watchdog_update_hrtimer_threshold() on correct config [ Upstream commit 5e008df11c55228a86a1bae692cc2002503572c9 ] Patch series "watchdog/hardlockup: Add the buddy hardlockup detector", v5. This patch series adds the "buddy" hardlockup detector. In brief, the buddy hardlockup detector can detect hardlockups without arch-level support by having CPUs checkup on a "buddy" CPU periodically. Given the new design of this patch series, testing all combinations is fairly difficult. I've attempted to make sure that all combinations of CONFIG_ options are good, but it wouldn't surprise me if I missed something. I apologize in advance and I'll do my best to fix any problems that are found. This patch (of 18): The real watchdog_update_hrtimer_threshold() is defined in kernel/watchdog_hld.c. That file is included if CONFIG_HARDLOCKUP_DETECTOR_PERF and the function is defined in that file if CONFIG_HARDLOCKUP_CHECK_TIMESTAMP. The dummy version of the function in "nmi.h" didn't get that quite right. While this doesn't appear to be a huge deal, it's nice to make it consistent. It doesn't break builds because CHECK_TIMESTAMP is only defined by x86 so others don't get a double definition, and x86 uses perf lockup detector, so it gets the out of line version. Link: https://lkml.kernel.org/r/20230519101840.v5.18.Ia44852044cdcb074f387e80df6b45e892965d4a1@changeid Link: https://lkml.kernel.org/r/20230519101840.v5.1.I8cbb2f4fa740528fcfade4f5439b6cdcdd059251@changeid Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes") Signed-off-by: Douglas Anderson Reviewed-by: Nicholas Piggin Reviewed-by: Petr Mladek Cc: Andi Kleen Cc: Catalin Marinas Cc: Chen-Yu Tsai Cc: Christophe Leroy Cc: Daniel Thompson Cc: "David S. Miller" Cc: Guenter Roeck Cc: Ian Rogers Cc: Lecopzer Chen Cc: Marc Zyngier Cc: Mark Rutland Cc: Masayoshi Mizuma Cc: Matthias Kaehlcke Cc: Michael Ellerman Cc: Pingfan Liu Cc: Randy Dunlap Cc: "Ravi V. Shankar" Cc: Ricardo Neri Cc: Stephane Eranian Cc: Stephen Boyd Cc: Sumit Garg Cc: Tzung-Bi Shih Cc: Will Deacon Cc: Colin Cross Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index f700ff2df074..0db377ff8f60 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -197,7 +197,7 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ - defined(CONFIG_HARDLOCKUP_DETECTOR) + defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) void watchdog_update_hrtimer_threshold(u64 period); #else static inline void watchdog_update_hrtimer_threshold(u64 period) { } From fdb07728d8ffa0f2f86767f3ef3d1bbe2e8270cb Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 19 May 2023 10:18:26 -0700 Subject: [PATCH 070/509] watchdog/perf: more properly prevent false positives with turbo modes [ Upstream commit 4379e59fe5665cfda737e45b8bf2f05321ef049c ] Currently, in the watchdog_overflow_callback() we first check to see if the watchdog had been touched and _then_ we handle the workaround for turbo mode. This order should be reversed. Specifically, "touching" the hardlockup detector's watchdog should avoid lockups being detected for one period that should be roughly the same regardless of whether we're running turbo or not. That means that we should do the extra accounting for turbo _before_ we look at (and clear) the global indicating that we've been touched. NOTE: this fix is made based on code inspection. I am not aware of any reports where the old code would have generated false positives. That being said, this order seems more correct and also makes it easier down the line to share code with the "buddy" hardlockup detector. Link: https://lkml.kernel.org/r/20230519101840.v5.2.I843b0d1de3e096ba111a179f3adb16d576bef5c7@changeid Fixes: 7edaeb6841df ("kernel/watchdog: Prevent false positives with turbo modes") Signed-off-by: Douglas Anderson Cc: Andi Kleen Cc: Catalin Marinas Cc: Chen-Yu Tsai Cc: Christophe Leroy Cc: Colin Cross Cc: Daniel Thompson Cc: "David S. Miller" Cc: Guenter Roeck Cc: Ian Rogers Cc: Lecopzer Chen Cc: Marc Zyngier Cc: Mark Rutland Cc: Masayoshi Mizuma Cc: Matthias Kaehlcke Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Petr Mladek Cc: Pingfan Liu Cc: Randy Dunlap Cc: "Ravi V. Shankar" Cc: Ricardo Neri Cc: Stephane Eranian Cc: Stephen Boyd Cc: Sumit Garg Cc: Tzung-Bi Shih Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/watchdog_hld.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 247bf0b1582c..1e8a49dc956e 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -114,14 +114,14 @@ static void watchdog_overflow_callback(struct perf_event *event, /* Ensure the watchdog never gets throttled */ event->hw.interrupts = 0; + if (!watchdog_check_timestamp()) + return; + if (__this_cpu_read(watchdog_nmi_touch) == true) { __this_cpu_write(watchdog_nmi_touch, false); return; } - if (!watchdog_check_timestamp()) - return; - /* check for a hardlockup * This is done by making sure our timer interrupt * is incrementing. The timer interrupt should have From 6cb477e7226bbceb9ca3f7fc306cdadf3eb92e7a Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 27 May 2023 20:34:34 +0800 Subject: [PATCH 071/509] kexec: fix a memory leak in crash_shrink_memory() [ Upstream commit 1cba6c4309f03de570202c46f03df3f73a0d4c82 ] Patch series "kexec: enable kexec_crash_size to support two crash kernel regions". When crashkernel=X fails to reserve region under 4G, it will fall back to reserve region above 4G and a region of the default size will also be reserved under 4G. Unfortunately, /sys/kernel/kexec_crash_size only supports one crash kernel region now, the user cannot sense the low memory reserved by reading /sys/kernel/kexec_crash_size. Also, low memory cannot be freed by writing this file. For example: resource_size(crashk_res) = 512M resource_size(crashk_low_res) = 256M The result of 'cat /sys/kernel/kexec_crash_size' is 512M, but it should be 768M. When we execute 'echo 0 > /sys/kernel/kexec_crash_size', the size of crashk_res becomes 0 and resource_size(crashk_low_res) is still 256 MB, which is incorrect. Since crashk_res manages the memory with high address and crashk_low_res manages the memory with low address, crashk_low_res is shrunken only when all crashk_res is shrunken. And because when there is only one crash kernel region, crashk_res is always used. Therefore, if all crashk_res is shrunken and crashk_low_res still exists, swap them. This patch (of 6): If the value of parameter 'new_size' is in the semi-open and semi-closed interval (crashk_res.end - KEXEC_CRASH_MEM_ALIGN + 1, crashk_res.end], the calculation result of ram_res is: ram_res->start = crashk_res.end + 1 ram_res->end = crashk_res.end The operation of insert_resource() fails, and ram_res is not added to iomem_resource. As a result, the memory of the control block ram_res is leaked. In fact, on all architectures, the start address and size of crashk_res are already aligned by KEXEC_CRASH_MEM_ALIGN. Therefore, we do not need to round up crashk_res.start again. Instead, we should round up 'new_size' in advance. Link: https://lkml.kernel.org/r/20230527123439.772-1-thunder.leizhen@huawei.com Link: https://lkml.kernel.org/r/20230527123439.772-2-thunder.leizhen@huawei.com Fixes: 6480e5a09237 ("kdump: add missing RAM resource in crash_shrink_memory()") Fixes: 06a7f711246b ("kexec: premit reduction of the reserved memory size") Signed-off-by: Zhen Lei Acked-by: Baoquan He Cc: Cong Wang Cc: Eric W. Biederman Cc: Michael Holzheu Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/kexec_core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 7a8104d48997..3a37fc62dc95 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1029,6 +1029,7 @@ int crash_shrink_memory(unsigned long new_size) start = crashk_res.start; end = crashk_res.end; old_size = (end == 0) ? 0 : end - start + 1; + new_size = roundup(new_size, KEXEC_CRASH_MEM_ALIGN); if (new_size >= old_size) { ret = (new_size == old_size) ? 0 : -EINVAL; goto unlock; @@ -1040,9 +1041,7 @@ int crash_shrink_memory(unsigned long new_size) goto unlock; } - start = roundup(start, KEXEC_CRASH_MEM_ALIGN); - end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); - + end = start + new_size; crash_free_reserved_phys_range(end, crashk_res.end); if ((start == end) && (crashk_res.parent != NULL)) From 6b22c2c649a17b6ef3aad3b484c6b88800528d8e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 22:27:04 +0200 Subject: [PATCH 072/509] memstick r592: make memstick_debug_get_tpc_name() static [ Upstream commit 434587df9f7fd68575f99a889cc5f2efc2eaee5e ] There are no other files referencing this function, apparently it was left global to avoid an 'unused function' warning when the only caller is left out. With a 'W=1' build, it causes a 'missing prototype' warning though: drivers/memstick/host/r592.c:47:13: error: no previous prototype for 'memstick_debug_get_tpc_name' [-Werror=missing-prototypes] Annotate the function as 'static __maybe_unused' to avoid both problems. Fixes: 926341250102 ("memstick: add driver for Ricoh R5C592 card reader") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230516202714.560929-1-arnd@kernel.org Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/memstick/host/r592.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index dd06c18495eb..0e37c6a5ee36 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -44,12 +44,10 @@ static const char *tpc_names[] = { * memstick_debug_get_tpc_name - debug helper that returns string for * a TPC number */ -const char *memstick_debug_get_tpc_name(int tpc) +static __maybe_unused const char *memstick_debug_get_tpc_name(int tpc) { return tpc_names[tpc-1]; } -EXPORT_SYMBOL(memstick_debug_get_tpc_name); - /* Read a register*/ static inline u32 r592_read_reg(struct r592_device *dev, int address) From 581401cd3cf9f9ebffd6b65769fe016007111092 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Fri, 9 Jun 2023 11:37:44 +0200 Subject: [PATCH 073/509] wifi: ath9k: Fix possible stall on ath9k_txq_list_has_key() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 75086cc6dee046e3fbb3dba148b376d8802f83bc ] On EDMA capable hardware, ath9k_txq_list_has_key() can enter infinite loop if it is called while all txq_fifos have packets that use different key that the one we are looking for. Fix it by exiting the loop if all txq_fifos have been checked already. Because this loop is called under spin_lock_bh() (see ath_txq_lock) it causes the following rcu stall: rcu: INFO: rcu_sched self-detected stall on CPU ath10k_pci 0000:01:00.0: failed to read temperature -11 rcu: 1-....: (5254 ticks this GP) idle=189/1/0x4000000000000002 softirq=8442983/8442984 fqs=2579 (t=5257 jiffies g=17983297 q=334) Task dump for CPU 1: task:hostapd state:R running task stack: 0 pid: 297 ppid: 289 flags:0x0000000a Call trace: dump_backtrace+0x0/0x170 show_stack+0x1c/0x24 sched_show_task+0x140/0x170 dump_cpu_task+0x48/0x54 rcu_dump_cpu_stacks+0xf0/0x134 rcu_sched_clock_irq+0x8d8/0x9fc update_process_times+0xa0/0xec tick_sched_timer+0x5c/0xd0 __hrtimer_run_queues+0x154/0x320 hrtimer_interrupt+0x120/0x2f0 arch_timer_handler_virt+0x38/0x44 handle_percpu_devid_irq+0x9c/0x1e0 handle_domain_irq+0x64/0x90 gic_handle_irq+0x78/0xb0 call_on_irq_stack+0x28/0x38 do_interrupt_handler+0x54/0x5c el1_interrupt+0x2c/0x4c el1h_64_irq_handler+0x14/0x1c el1h_64_irq+0x74/0x78 ath9k_txq_has_key+0x1bc/0x250 [ath9k] ath9k_set_key+0x1cc/0x3dc [ath9k] drv_set_key+0x78/0x170 ieee80211_key_replace+0x564/0x6cc ieee80211_key_link+0x174/0x220 ieee80211_add_key+0x11c/0x300 nl80211_new_key+0x12c/0x330 genl_family_rcv_msg_doit+0xbc/0x11c genl_rcv_msg+0xd8/0x1c4 netlink_rcv_skb+0x40/0x100 genl_rcv+0x3c/0x50 netlink_unicast+0x1ec/0x2c0 netlink_sendmsg+0x198/0x3c0 ____sys_sendmsg+0x210/0x250 ___sys_sendmsg+0x78/0xc4 __sys_sendmsg+0x4c/0x90 __arm64_sys_sendmsg+0x28/0x30 invoke_syscall.constprop.0+0x60/0x100 do_el0_svc+0x48/0xd0 el0_svc+0x14/0x50 el0t_64_sync_handler+0xa8/0xb0 el0t_64_sync+0x158/0x15c This rcu stall is hard to reproduce as is, but changing ATH_TXFIFO_DEPTH from 8 to 2 makes it reasonably easy to reproduce. Fixes: ca2848022c12 ("ath9k: Postpone key cache entry deletion for TXQ frames reference it") Signed-off-by: Remi Pommarel Tested-by: Nicolas Escande Acked-by: Toke HĆøiland-JĆørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230609093744.1985-1-repk@triplefau.lt Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index ac354dfc5055..2bd4d295c9bd 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -850,7 +850,7 @@ static bool ath9k_txq_list_has_key(struct list_head *txq_list, u32 keyix) static bool ath9k_txq_has_key(struct ath_softc *sc, u32 keyix) { struct ath_hw *ah = sc->sc_ah; - int i; + int i, j; struct ath_txq *txq; bool key_in_use = false; @@ -868,8 +868,9 @@ static bool ath9k_txq_has_key(struct ath_softc *sc, u32 keyix) if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) { int idx = txq->txq_tailidx; - while (!key_in_use && - !list_empty(&txq->txq_fifo[idx])) { + for (j = 0; !key_in_use && + !list_empty(&txq->txq_fifo[idx]) && + j < ATH_TXFIFO_DEPTH; j++) { key_in_use = ath9k_txq_list_has_key( &txq->txq_fifo[idx], keyix); INCR(idx, ATH_TXFIFO_DEPTH); From 2715617c2aad5092c7cb9c75020d2af051f9e664 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 11 Jun 2023 13:51:08 +0300 Subject: [PATCH 074/509] rtnetlink: extend RTEXT_FILTER_SKIP_STATS to IFLA_VF_INFO [ Upstream commit fa0e21fa44438a0e856d42224bfa24641d37b979 ] This filter already exists for excluding IPv6 SNMP stats. Extend its definition to also exclude IFLA_VF_INFO stats in RTM_GETLINK. This patch constitutes a partial fix for a netlink attribute nesting overflow bug in IFLA_VFINFO_LIST. By excluding the stats when the requester doesn't need them, the truncation of the VF list is avoided. While it was technically only the stats added in commit c5a9f6f0ab40 ("net/core: Add drop counters to VF statistics") breaking the camel's back, the appreciable size of the stats data should never have been included without due consideration for the maximum number of VFs supported by PCI. Fixes: 3b766cd83232 ("net/core: Add reading VF statistics through the PF netdevice") Fixes: c5a9f6f0ab40 ("net/core: Add drop counters to VF statistics") Signed-off-by: Edwin Peer Cc: Edwin Peer Signed-off-by: Gal Pressman Link: https://lore.kernel.org/r/20230611105108.122586-1-gal@nvidia.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 96 +++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 45 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3c9c2d6e3b92..888ff53c8144 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -929,24 +929,27 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + - nla_total_size(0) + /* nest IFLA_VF_STATS */ - /* IFLA_VF_STATS_RX_PACKETS */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_PACKETS */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_RX_BYTES */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_BYTES */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_BROADCAST */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_MULTICAST */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_RX_DROPPED */ - nla_total_size_64bit(sizeof(__u64)) + - /* IFLA_VF_STATS_TX_DROPPED */ - nla_total_size_64bit(sizeof(__u64)) + nla_total_size(sizeof(struct ifla_vf_trust))); + if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) { + size += num_vfs * + (nla_total_size(0) + /* nest IFLA_VF_STATS */ + /* IFLA_VF_STATS_RX_PACKETS */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_PACKETS */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_BYTES */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_BYTES */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_BROADCAST */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_MULTICAST */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_RX_DROPPED */ + nla_total_size_64bit(sizeof(__u64)) + + /* IFLA_VF_STATS_TX_DROPPED */ + nla_total_size_64bit(sizeof(__u64))); + } return size; } else return 0; @@ -1221,7 +1224,8 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb, static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct net_device *dev, int vfs_num, - struct nlattr *vfinfo) + struct nlattr *vfinfo, + u32 ext_filter_mask) { struct ifla_vf_rss_query_en vf_rss_query_en; struct nlattr *vf, *vfstats, *vfvlanlist; @@ -1327,33 +1331,35 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, goto nla_put_vf_failure; } nla_nest_end(skb, vfvlanlist); - memset(&vf_stats, 0, sizeof(vf_stats)); - if (dev->netdev_ops->ndo_get_vf_stats) - dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, - &vf_stats); - vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS); - if (!vfstats) - goto nla_put_vf_failure; - if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, - vf_stats.rx_packets, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, - vf_stats.tx_packets, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, - vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, - vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, - vf_stats.broadcast, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED, - vf_stats.rx_dropped, IFLA_VF_STATS_PAD) || - nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED, - vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) { - nla_nest_cancel(skb, vfstats); - goto nla_put_vf_failure; + if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) { + memset(&vf_stats, 0, sizeof(vf_stats)); + if (dev->netdev_ops->ndo_get_vf_stats) + dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, + &vf_stats); + vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS); + if (!vfstats) + goto nla_put_vf_failure; + if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, + vf_stats.rx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, + vf_stats.tx_packets, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, + vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, + vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, + vf_stats.broadcast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, + vf_stats.multicast, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED, + vf_stats.rx_dropped, IFLA_VF_STATS_PAD) || + nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED, + vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) { + nla_nest_cancel(skb, vfstats); + goto nla_put_vf_failure; + } + nla_nest_end(skb, vfstats); } - nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); return 0; @@ -1386,7 +1392,7 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, return -EMSGSIZE; for (i = 0; i < num_vfs; i++) { - if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) + if (rtnl_fill_vfinfo(skb, dev, i, vfinfo, ext_filter_mask)) return -EMSGSIZE; } From 4e321c18ef9252af98f1910390862421843f4d63 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Jun 2023 12:41:22 +0300 Subject: [PATCH 075/509] wifi: iwlwifi: pull from TXQs with softirqs disabled [ Upstream commit 96fb6f47db24a712d650b0a9b9074873f273fb0e ] In mac80211, it's required that we pull from TXQs by calling ieee80211_tx_dequeue() only with softirqs disabled. However, in iwl_mvm_queue_state_change() we're often called with them enabled, e.g. from flush if anything was flushed, triggering a mac80211 warning. Fix that by disabling the softirqs across the TX call. Fixes: cfbc6c4c5b91 ("iwlwifi: mvm: support mac80211 TXQs model") Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230614123446.0feef7fa81db.I4dd62542d955b40dd8f0af34fa4accb9d0d17c7e@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 7c61d179895b..5b173f21e87b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1174,8 +1174,11 @@ static void iwl_mvm_queue_state_change(struct iwl_op_mode *op_mode, mvmtxq = iwl_mvm_txq_from_mac80211(txq); mvmtxq->stopped = !start; - if (start && mvmsta->sta_state != IEEE80211_STA_NOTEXIST) + if (start && mvmsta->sta_state != IEEE80211_STA_NOTEXIST) { + local_bh_disable(); iwl_mvm_mac_itxq_xmit(mvm->hw, txq); + local_bh_enable(); + } } out: From 150ca0768b508ca255796b7800e73032006ceaa7 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 16 Jun 2023 09:54:03 +0300 Subject: [PATCH 076/509] wifi: cfg80211: rewrite merging of inherited elements [ Upstream commit dfd9aa3e7a456d57b18021d66472ab7ff8373ab7 ] The cfg80211_gen_new_ie function merges the IEs using inheritance rules. Rewrite this function to fix issues around inheritance rules. In particular, vendor elements do not require any special handling, as they are either all inherited or overridden by the subprofile. Also, add fragmentation handling as this may be needed in some cases. This also changes the function to not require making a copy. The new version could be optimized a bit by explicitly tracking which IEs have been handled already rather than looking that up again every time. Note that a small behavioural change is the removal of the SSID special handling. This should be fine for the MBSSID element, as the SSID must be included in the subelement. Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Signed-off-by: Benjamin Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230616094949.bc6152e146db.I2b5f3bc45085e1901e5b5192a674436adaf94748@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 217 +++++++++++++++++++++++++------------------- 1 file changed, 126 insertions(+), 91 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d09dabae5627..671c7f83d5fc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -262,117 +262,152 @@ bool cfg80211_is_element_inherited(const struct element *elem, } EXPORT_SYMBOL(cfg80211_is_element_inherited); -static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, - const u8 *subelement, size_t subie_len, - u8 *new_ie, gfp_t gfp) +static size_t cfg80211_copy_elem_with_frags(const struct element *elem, + const u8 *ie, size_t ie_len, + u8 **pos, u8 *buf, size_t buf_len) { - u8 *pos, *tmp; - const u8 *tmp_old, *tmp_new; - const struct element *non_inherit_elem; - u8 *sub_copy; - - /* copy subelement as we need to change its content to - * mark an ie after it is processed. - */ - sub_copy = kmemdup(subelement, subie_len, gfp); - if (!sub_copy) + if (WARN_ON((u8 *)elem < ie || elem->data > ie + ie_len || + elem->data + elem->datalen > ie + ie_len)) return 0; - pos = &new_ie[0]; + if (elem->datalen + 2 > buf + buf_len - *pos) + return 0; - /* set new ssid */ - tmp_new = cfg80211_find_ie(WLAN_EID_SSID, sub_copy, subie_len); - if (tmp_new) { - memcpy(pos, tmp_new, tmp_new[1] + 2); - pos += (tmp_new[1] + 2); + memcpy(*pos, elem, elem->datalen + 2); + *pos += elem->datalen + 2; + + /* Finish if it is not fragmented */ + if (elem->datalen != 255) + return *pos - buf; + + ie_len = ie + ie_len - elem->data - elem->datalen; + ie = (const u8 *)elem->data + elem->datalen; + + for_each_element(elem, ie, ie_len) { + if (elem->id != WLAN_EID_FRAGMENT) + break; + + if (elem->datalen + 2 > buf + buf_len - *pos) + return 0; + + memcpy(*pos, elem, elem->datalen + 2); + *pos += elem->datalen + 2; + + if (elem->datalen != 255) + break; } - /* get non inheritance list if exists */ - non_inherit_elem = - cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, - sub_copy, subie_len); + return *pos - buf; +} - /* go through IEs in ie (skip SSID) and subelement, - * merge them into new_ie +static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen, + const u8 *subie, size_t subie_len, + u8 *new_ie, size_t new_ie_len) +{ + const struct element *non_inherit_elem, *parent, *sub; + u8 *pos = new_ie; + u8 id, ext_id; + unsigned int match_len; + + non_inherit_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, + subie, subie_len); + + /* We copy the elements one by one from the parent to the generated + * elements. + * If they are not inherited (included in subie or in the non + * inheritance element), then we copy all occurrences the first time + * we see this element type. */ - tmp_old = cfg80211_find_ie(WLAN_EID_SSID, ie, ielen); - tmp_old = (tmp_old) ? tmp_old + tmp_old[1] + 2 : ie; + for_each_element(parent, ie, ielen) { + if (parent->id == WLAN_EID_FRAGMENT) + continue; + + if (parent->id == WLAN_EID_EXTENSION) { + if (parent->datalen < 1) + continue; + + id = WLAN_EID_EXTENSION; + ext_id = parent->data[0]; + match_len = 1; + } else { + id = parent->id; + match_len = 0; + } + + /* Find first occurrence in subie */ + sub = cfg80211_find_elem_match(id, subie, subie_len, + &ext_id, match_len, 0); + + /* Copy from parent if not in subie and inherited */ + if (!sub && + cfg80211_is_element_inherited(parent, non_inherit_elem)) { + if (!cfg80211_copy_elem_with_frags(parent, + ie, ielen, + &pos, new_ie, + new_ie_len)) + return 0; - while (tmp_old + 2 - ie <= ielen && - tmp_old + tmp_old[1] + 2 - ie <= ielen) { - if (tmp_old[0] == 0) { - tmp_old++; continue; } - if (tmp_old[0] == WLAN_EID_EXTENSION) - tmp = (u8 *)cfg80211_find_ext_ie(tmp_old[2], sub_copy, - subie_len); - else - tmp = (u8 *)cfg80211_find_ie(tmp_old[0], sub_copy, - subie_len); + /* Already copied if an earlier element had the same type */ + if (cfg80211_find_elem_match(id, ie, (u8 *)parent - ie, + &ext_id, match_len, 0)) + continue; - if (!tmp) { - const struct element *old_elem = (void *)tmp_old; + /* Not inheriting, copy all similar elements from subie */ + while (sub) { + if (!cfg80211_copy_elem_with_frags(sub, + subie, subie_len, + &pos, new_ie, + new_ie_len)) + return 0; - /* ie in old ie but not in subelement */ - if (cfg80211_is_element_inherited(old_elem, - non_inherit_elem)) { - memcpy(pos, tmp_old, tmp_old[1] + 2); - pos += tmp_old[1] + 2; - } - } else { - /* ie in transmitting ie also in subelement, - * copy from subelement and flag the ie in subelement - * as copied (by setting eid field to WLAN_EID_SSID, - * which is skipped anyway). - * For vendor ie, compare OUI + type + subType to - * determine if they are the same ie. - */ - if (tmp_old[0] == WLAN_EID_VENDOR_SPECIFIC) { - if (tmp_old[1] >= 5 && tmp[1] >= 5 && - !memcmp(tmp_old + 2, tmp + 2, 5)) { - /* same vendor ie, copy from - * subelement - */ - memcpy(pos, tmp, tmp[1] + 2); - pos += tmp[1] + 2; - tmp[0] = WLAN_EID_SSID; - } else { - memcpy(pos, tmp_old, tmp_old[1] + 2); - pos += tmp_old[1] + 2; - } - } else { - /* copy ie from subelement into new ie */ - memcpy(pos, tmp, tmp[1] + 2); - pos += tmp[1] + 2; - tmp[0] = WLAN_EID_SSID; - } + sub = cfg80211_find_elem_match(id, + sub->data + sub->datalen, + subie_len + subie - + (sub->data + + sub->datalen), + &ext_id, match_len, 0); } - - if (tmp_old + tmp_old[1] + 2 - ie == ielen) - break; - - tmp_old += tmp_old[1] + 2; } - /* go through subelement again to check if there is any ie not - * copied to new ie, skip ssid, capability, bssid-index ie + /* The above misses elements that are included in subie but not in the + * parent, so do a pass over subie and append those. + * Skip the non-tx BSSID caps and non-inheritance element. */ - tmp_new = sub_copy; - while (tmp_new + 2 - sub_copy <= subie_len && - tmp_new + tmp_new[1] + 2 - sub_copy <= subie_len) { - if (!(tmp_new[0] == WLAN_EID_NON_TX_BSSID_CAP || - tmp_new[0] == WLAN_EID_SSID)) { - memcpy(pos, tmp_new, tmp_new[1] + 2); - pos += tmp_new[1] + 2; + for_each_element(sub, subie, subie_len) { + if (sub->id == WLAN_EID_NON_TX_BSSID_CAP) + continue; + + if (sub->id == WLAN_EID_FRAGMENT) + continue; + + if (sub->id == WLAN_EID_EXTENSION) { + if (sub->datalen < 1) + continue; + + id = WLAN_EID_EXTENSION; + ext_id = sub->data[0]; + match_len = 1; + + if (ext_id == WLAN_EID_EXT_NON_INHERITANCE) + continue; + } else { + id = sub->id; + match_len = 0; } - if (tmp_new + tmp_new[1] + 2 - sub_copy == subie_len) - break; - tmp_new += tmp_new[1] + 2; + + /* Processed if one was included in the parent */ + if (cfg80211_find_elem_match(id, ie, ielen, + &ext_id, match_len, 0)) + continue; + + if (!cfg80211_copy_elem_with_frags(sub, subie, subie_len, + &pos, new_ie, new_ie_len)) + return 0; } - kfree(sub_copy); return pos - new_ie; } @@ -2170,7 +2205,7 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy, new_ie_len = cfg80211_gen_new_ie(ie, ielen, profile, profile_len, new_ie, - gfp); + IEEE80211_MAX_DATA_LEN); if (!new_ie_len) continue; From 660d4e73efb04dddd094f5a13b7a3bb2dbe2c6b6 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 13 Jun 2023 16:46:55 +0300 Subject: [PATCH 077/509] wifi: ath9k: convert msecs to jiffies where needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2aa083acea9f61be3280184384551178f510ff51 ] Since 'ieee80211_queue_delayed_work()' expects timeout in jiffies and not milliseconds, 'msecs_to_jiffies()' should be used in 'ath_restart_work()' and '__ath9k_flush()'. Fixes: d63ffc45c5d3 ("ath9k: rename tx_complete_work to hw_check_work") Signed-off-by: Dmitry Antipov Acked-by: Toke HĆøiland-JĆørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230613134655.248728-1-dmantipov@yandex.ru Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 2bd4d295c9bd..b2cfc483515c 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -203,7 +203,7 @@ void ath_cancel_work(struct ath_softc *sc) void ath_restart_work(struct ath_softc *sc) { ieee80211_queue_delayed_work(sc->hw, &sc->hw_check_work, - ATH_HW_CHECK_POLL_INT); + msecs_to_jiffies(ATH_HW_CHECK_POLL_INT)); if (AR_SREV_9340(sc->sc_ah) || AR_SREV_9330(sc->sc_ah)) ieee80211_queue_delayed_work(sc->hw, &sc->hw_pll_work, @@ -2244,7 +2244,7 @@ void __ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop, } ieee80211_queue_delayed_work(hw, &sc->hw_check_work, - ATH_HW_CHECK_POLL_INT); + msecs_to_jiffies(ATH_HW_CHECK_POLL_INT)); } static bool ath9k_tx_frames_pending(struct ieee80211_hw *hw) From 1ba91ffa1a0ed4cd134c5bf43508f797a7ea9dd0 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 7 Jun 2023 14:32:29 -0700 Subject: [PATCH 078/509] igc: Fix race condition in PTP tx code [ Upstream commit 9c50e2b150c8ee0eee5f8154e2ad168cdd748877 ] Currently, the igc driver supports timestamping only one tx packet at a time. During the transmission flow, the skb that requires hardware timestamping is saved in adapter->ptp_tx_skb. Once hardware has the timestamp, an interrupt is delivered, and adapter->ptp_tx_work is scheduled. In igc_ptp_tx_work(), we read the timestamp register, update adapter->ptp_tx_skb, and notify the network stack. While the thread executing the transmission flow (the user process running in kernel mode) and the thread executing ptp_tx_work don't access adapter->ptp_tx_skb concurrently, there are two other places where adapter->ptp_tx_skb is accessed: igc_ptp_tx_hang() and igc_ptp_suspend(). igc_ptp_tx_hang() is executed by the adapter->watchdog_task worker thread which runs periodically so it is possible we have two threads accessing ptp_tx_skb at the same time. Consider the following scenario: right after __IGC_PTP_TX_IN_PROGRESS is set in igc_xmit_frame_ring(), igc_ptp_tx_hang() is executed. Since adapter->ptp_tx_start hasn't been written yet, this is considered a timeout and adapter->ptp_tx_skb is cleaned up. This patch fixes the issue described above by adding the ptp_tx_lock to protect access to ptp_tx_skb and ptp_tx_start fields from igc_adapter. Since igc_xmit_frame_ring() called in atomic context by the networking stack, ptp_tx_lock is defined as a spinlock, and the irq safe variants of lock/unlock are used. With the introduction of the ptp_tx_lock, the __IGC_PTP_TX_IN_PROGRESS flag doesn't provide much of a use anymore so this patch gets rid of it. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: Andre Guedes Signed-off-by: Vinicius Costa Gomes Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc.h | 5 +- drivers/net/ethernet/intel/igc/igc_main.c | 9 ++-- drivers/net/ethernet/intel/igc/igc_ptp.c | 57 ++++++++++++----------- 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 47ba1eafcdc7..33f64c80335d 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -210,6 +210,10 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; struct work_struct ptp_tx_work; + /* Access to ptp_tx_skb and ptp_tx_start are protected by the + * ptp_tx_lock. + */ + spinlock_t ptp_tx_lock; struct sk_buff *ptp_tx_skb; struct hwtstamp_config tstamp_config; unsigned long ptp_tx_start; @@ -389,7 +393,6 @@ enum igc_state_t { __IGC_TESTING, __IGC_RESETTING, __IGC_DOWN, - __IGC_PTP_TX_IN_PROGRESS, }; enum igc_tx_flags { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 72d7d2cf126d..a15e4b6d7fa4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1467,9 +1467,10 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, * the other timer registers before skipping the * timestamping request. */ - if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, - &adapter->state)) { + unsigned long flags; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && !adapter->ptp_tx_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP; @@ -1478,6 +1479,8 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, } else { adapter->tx_hwtstamp_skipped++; } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /* record initial flags and protocol */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index ef53f7665b58..25b238c6a675 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -323,6 +323,7 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, return 0; } +/* Requires adapter->ptp_tx_lock held by caller. */ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -330,7 +331,6 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; adapter->tx_hwtstamp_timeouts++; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ rd32(IGC_TXSTMPH); netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); @@ -338,20 +338,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) void igc_ptp_tx_hang(struct igc_adapter *adapter) { - bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + - IGC_PTP_TX_TIMEOUT); + unsigned long flags; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - /* If we haven't received a timestamp within the timeout, it is - * reasonable to assume that it will never occur, so we can unlock the - * timestamp bit when this occurs. - */ - if (timeout) { - cancel_work_sync(&adapter->ptp_tx_work); - igc_ptp_tx_timeout(adapter); - } + if (!adapter->ptp_tx_skb) + goto unlock; + + if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT)) + goto unlock; + + igc_ptp_tx_timeout(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -361,6 +361,8 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * If we were asked to do hardware stamping and such a time stamp is * available, then it must have been for this skb here because we only * allow only one such packet into the queue. + * + * Context: Expects adapter->ptp_tx_lock to be held by caller. */ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { @@ -396,13 +398,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) shhwtstamps.hwtstamp = ktime_add_ns(shhwtstamps.hwtstamp, adjust); - /* Clear the lock early before calling skb_tstamp_tx so that - * applications are not woken up before the lock bit is clear. We use - * a copy of the skb pointer to ensure other threads can't change it - * while we're notifying the stack. - */ adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Notify the stack and free the skb after we've unlocked */ skb_tstamp_tx(skb, &shhwtstamps); @@ -413,24 +409,33 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) * igc_ptp_tx_work * @work: pointer to work struct * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. + * This work function checks the TSYNCTXCTL valid bit to determine when + * a timestamp has been taken for the current stored skb. */ static void igc_ptp_tx_work(struct work_struct *work) { struct igc_adapter *adapter = container_of(work, struct igc_adapter, ptp_tx_work); struct igc_hw *hw = &adapter->hw; + unsigned long flags; u32 tsynctxctl; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + if (!adapter->ptp_tx_skb) + goto unlock; tsynctxctl = rd32(IGC_TSYNCTXCTL); - if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0))) - return; + tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0; + if (!tsynctxctl) { + WARN_ONCE(1, "Received a TSTAMP interrupt but no TSTAMP is ready.\n"); + goto unlock; + } igc_ptp_tx_hwtstamp(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -506,6 +511,7 @@ void igc_ptp_init(struct igc_adapter *adapter) return; } + spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->tmreg_lock); INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); @@ -559,7 +565,6 @@ void igc_ptp_suspend(struct igc_adapter *adapter) cancel_work_sync(&adapter->ptp_tx_work); dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); if (pci_device_is_present(adapter->pdev)) igc_ptp_time_save(adapter); From 0c9e48428f6b3916a04eeb98a6394558ced0b6d2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Jun 2023 15:55:37 +0200 Subject: [PATCH 079/509] net: stmmac: fix double serdes powerdown [ Upstream commit c4fc88ad2a765224a648db8ab35f125e120fe41b ] Commit 49725ffc15fc ("net: stmmac: power up/down serdes in stmmac_open/release") correctly added a call to the serdes_powerdown() callback to stmmac_release() but did not remove the one from stmmac_remove() which leads to a doubled call to serdes_powerdown(). This can lead to all kinds of problems: in the case of the qcom ethqos driver, it caused an unbalanced regulator disable splat. Fixes: 49725ffc15fc ("net: stmmac: power up/down serdes in stmmac_open/release") Signed-off-by: Bartosz Golaszewski Reviewed-by: Jiri Pirko Acked-by: Junxiao Chang Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney Link: https://lore.kernel.org/r/20230621135537.376649-1-brgl@bgdev.pl Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index de66406c5057..83e9a4d019c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5254,12 +5254,6 @@ int stmmac_dvr_remove(struct device *dev) netif_carrier_off(ndev); unregister_netdev(ndev); - /* Serdes power down needs to happen after VLAN filter - * is deleted that is triggered by unregister_netdev(). - */ - if (priv->plat->serdes_powerdown) - priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); - #ifdef CONFIG_DEBUG_FS stmmac_exit_fs(ndev); #endif From cde7b90e0539a3b11da377e463dfd2288a162dbf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Jun 2023 15:43:37 +0000 Subject: [PATCH 080/509] netlink: fix potential deadlock in netlink_set_err() [ Upstream commit 8d61f926d42045961e6b65191c09e3678d86a9cf ] syzbot reported a possible deadlock in netlink_set_err() [1] A similar issue was fixed in commit 1d482e666b8e ("netlink: disable IRQs for netlink_lock_table()") in netlink_lock_table() This patch adds IRQ safety to netlink_set_err() and __netlink_diag_dump() which were not covered by cited commit. [1] WARNING: possible irq lock inversion dependency detected 6.4.0-rc6-syzkaller-00240-g4e9f0ec38852 #0 Not tainted syz-executor.2/23011 just changed the state of lock: ffffffff8e1a7a58 (nl_table_lock){.+.?}-{2:2}, at: netlink_set_err+0x2e/0x3a0 net/netlink/af_netlink.c:1612 but this lock was taken by another, SOFTIRQ-safe lock in the past: (&local->queue_stop_reason_lock){..-.}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(nl_table_lock); local_irq_disable(); lock(&local->queue_stop_reason_lock); lock(nl_table_lock); lock(&local->queue_stop_reason_lock); *** DEADLOCK *** Fixes: 1d482e666b8e ("netlink: disable IRQs for netlink_lock_table()") Reported-by: syzbot+a7d200a347f912723e5c@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=a7d200a347f912723e5c Link: https://lore.kernel.org/netdev/000000000000e38d1605fea5747e@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Johannes Berg Link: https://lore.kernel.org/r/20230621154337.1668594-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/netlink/af_netlink.c | 5 +++-- net/netlink/diag.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 99c869d8d304..9737c3229c12 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1602,6 +1602,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; + unsigned long flags; struct sock *sk; int ret = 0; @@ -1611,12 +1612,12 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) /* sk->sk_err wants a positive error value */ info.code = -code; - read_lock(&nl_table_lock); + read_lock_irqsave(&nl_table_lock, flags); sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list) ret += do_one_set_err(sk, &info); - read_unlock(&nl_table_lock); + read_unlock_irqrestore(&nl_table_lock, flags); return ret; } EXPORT_SYMBOL(netlink_set_err); diff --git a/net/netlink/diag.c b/net/netlink/diag.c index c6255eac305c..4143b2ea4195 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -94,6 +94,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; + unsigned long flags; struct sock *sk; int num = 2; int ret = 0; @@ -152,7 +153,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, num++; mc_list: - read_lock(&nl_table_lock); + read_lock_irqsave(&nl_table_lock, flags); sk_for_each_bound(sk, &tbl->mc_list) { if (sk_hashed(sk)) continue; @@ -173,7 +174,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, } num++; } - read_unlock(&nl_table_lock); + read_unlock_irqrestore(&nl_table_lock, flags); done: cb->args[0] = num; From 44db85c6e1a184b99a2cdf56b525ac63c4962c22 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Jun 2023 17:47:20 +0000 Subject: [PATCH 081/509] netlink: do not hard code device address lenth in fdb dumps [ Upstream commit aa5406950726e336c5c9585b09799a734b6e77bf ] syzbot reports that some netdev devices do not have a six bytes address [1] Replace ETH_ALEN by dev->addr_len. [1] (Case of a device where dev->addr_len = 4) BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in copyout+0xb8/0x100 lib/iov_iter.c:169 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copyout+0xb8/0x100 lib/iov_iter.c:169 _copy_to_iter+0x6d8/0x1d00 lib/iov_iter.c:536 copy_to_iter include/linux/uio.h:206 [inline] simple_copy_to_iter+0x68/0xa0 net/core/datagram.c:513 __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:419 skb_copy_datagram_iter+0x5c/0x200 net/core/datagram.c:527 skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline] netlink_recvmsg+0x4ae/0x15a0 net/netlink/af_netlink.c:1970 sock_recvmsg_nosec net/socket.c:1019 [inline] sock_recvmsg net/socket.c:1040 [inline] ____sys_recvmsg+0x283/0x7f0 net/socket.c:2722 ___sys_recvmsg+0x223/0x840 net/socket.c:2764 do_recvmmsg+0x4f9/0xfd0 net/socket.c:2858 __sys_recvmmsg net/socket.c:2937 [inline] __do_sys_recvmmsg net/socket.c:2960 [inline] __se_sys_recvmmsg net/socket.c:2953 [inline] __x64_sys_recvmmsg+0x397/0x490 net/socket.c:2953 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was stored to memory at: __nla_put lib/nlattr.c:1009 [inline] nla_put+0x1c6/0x230 lib/nlattr.c:1067 nlmsg_populate_fdb_fill+0x2b8/0x600 net/core/rtnetlink.c:4071 nlmsg_populate_fdb net/core/rtnetlink.c:4418 [inline] ndo_dflt_fdb_dump+0x616/0x840 net/core/rtnetlink.c:4456 rtnl_fdb_dump+0x14ff/0x1fc0 net/core/rtnetlink.c:4629 netlink_dump+0x9d1/0x1310 net/netlink/af_netlink.c:2268 netlink_recvmsg+0xc5c/0x15a0 net/netlink/af_netlink.c:1995 sock_recvmsg_nosec+0x7a/0x120 net/socket.c:1019 ____sys_recvmsg+0x664/0x7f0 net/socket.c:2720 ___sys_recvmsg+0x223/0x840 net/socket.c:2764 do_recvmmsg+0x4f9/0xfd0 net/socket.c:2858 __sys_recvmmsg net/socket.c:2937 [inline] __do_sys_recvmmsg net/socket.c:2960 [inline] __se_sys_recvmmsg net/socket.c:2953 [inline] __x64_sys_recvmmsg+0x397/0x490 net/socket.c:2953 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Uninit was created at: slab_post_alloc_hook+0x12d/0xb60 mm/slab.h:716 slab_alloc_node mm/slub.c:3451 [inline] __kmem_cache_alloc_node+0x4ff/0x8b0 mm/slub.c:3490 kmalloc_trace+0x51/0x200 mm/slab_common.c:1057 kmalloc include/linux/slab.h:559 [inline] __hw_addr_create net/core/dev_addr_lists.c:60 [inline] __hw_addr_add_ex+0x2e5/0x9e0 net/core/dev_addr_lists.c:118 __dev_mc_add net/core/dev_addr_lists.c:867 [inline] dev_mc_add+0x9a/0x130 net/core/dev_addr_lists.c:885 igmp6_group_added+0x267/0xbc0 net/ipv6/mcast.c:680 ipv6_mc_up+0x296/0x3b0 net/ipv6/mcast.c:2754 ipv6_mc_remap+0x1e/0x30 net/ipv6/mcast.c:2708 addrconf_type_change net/ipv6/addrconf.c:3731 [inline] addrconf_notify+0x4d3/0x1d90 net/ipv6/addrconf.c:3699 notifier_call_chain kernel/notifier.c:93 [inline] raw_notifier_call_chain+0xe4/0x430 kernel/notifier.c:461 call_netdevice_notifiers_info net/core/dev.c:1935 [inline] call_netdevice_notifiers_extack net/core/dev.c:1973 [inline] call_netdevice_notifiers+0x1ee/0x2d0 net/core/dev.c:1987 bond_enslave+0xccd/0x53f0 drivers/net/bonding/bond_main.c:1906 do_set_master net/core/rtnetlink.c:2626 [inline] rtnl_newlink_create net/core/rtnetlink.c:3460 [inline] __rtnl_newlink net/core/rtnetlink.c:3660 [inline] rtnl_newlink+0x378c/0x40e0 net/core/rtnetlink.c:3673 rtnetlink_rcv_msg+0x16a6/0x1840 net/core/rtnetlink.c:6395 netlink_rcv_skb+0x371/0x650 net/netlink/af_netlink.c:2546 rtnetlink_rcv+0x34/0x40 net/core/rtnetlink.c:6413 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0xf28/0x1230 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x122f/0x13d0 net/netlink/af_netlink.c:1913 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg net/socket.c:747 [inline] ____sys_sendmsg+0x999/0xd50 net/socket.c:2503 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2557 __sys_sendmsg net/socket.c:2586 [inline] __do_sys_sendmsg net/socket.c:2595 [inline] __se_sys_sendmsg net/socket.c:2593 [inline] __x64_sys_sendmsg+0x304/0x490 net/socket.c:2593 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Bytes 2856-2857 of 3500 are uninitialized Memory access of size 3500 starts at ffff888018d99104 Data copied to user address 0000000020000480 Fixes: d83b06036048 ("net: add fdb generic dump routine") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20230621174720.1845040-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 888ff53c8144..d3c03ebf06a5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3889,7 +3889,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, ndm->ndm_ifindex = dev->ifindex; ndm->ndm_state = ndm_state; - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) + if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr)) goto nla_put_failure; if (vid) if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid)) @@ -3903,10 +3903,10 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, return -EMSGSIZE; } -static inline size_t rtnl_fdb_nlmsg_size(void) +static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + - nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(dev->addr_len) + /* NDA_LLADDR */ nla_total_size(sizeof(u16)) + /* NDA_VLAN */ 0; } @@ -3918,7 +3918,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(rtnl_fdb_nlmsg_size(), GFP_ATOMIC); + skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC); if (!skb) goto errout; From 4d9cd4b330d80785c48f4e39790e6f8e2c1af834 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 22 Jun 2023 23:03:34 +0200 Subject: [PATCH 082/509] selftests: rtnetlink: remove netdevsim device after ipsec offload test [ Upstream commit 5f789f103671fec3733ebe756e56adf15c90c21d ] On systems where netdevsim is built-in or loaded before the test starts, kci_test_ipsec_offload doesn't remove the netdevsim device it created during the test. Fixes: e05b2d141fef ("netdevsim: move netdev creation/destruction to dev probe") Signed-off-by: Sabrina Dubroca Reviewed-by: Simon Horman Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/e1cb94f4f82f4eca4a444feec4488a1323396357.1687466906.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/rtnetlink.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index c3a905923ef2..cbf166df57da 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -835,6 +835,7 @@ EOF fi # clean up any leftovers + echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim if [ $ret -ne 0 ]; then From bccc7ace12e69dee4684a3bb4b69737972e570d6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 22 Jun 2023 14:32:31 -0700 Subject: [PATCH 083/509] gtp: Fix use-after-free in __gtp_encap_destroy(). [ Upstream commit ce3aee7114c575fab32a5e9e939d4bbb3dcca79f ] syzkaller reported use-after-free in __gtp_encap_destroy(). [0] It shows the same process freed sk and touched it illegally. Commit e198987e7dd7 ("gtp: fix suspicious RCU usage") added lock_sock() and release_sock() in __gtp_encap_destroy() to protect sk->sk_user_data, but release_sock() is called after sock_put() releases the last refcnt. [0]: BUG: KASAN: slab-use-after-free in instrument_atomic_read_write include/linux/instrumented.h:96 [inline] BUG: KASAN: slab-use-after-free in atomic_try_cmpxchg_acquire include/linux/atomic/atomic-instrumented.h:541 [inline] BUG: KASAN: slab-use-after-free in queued_spin_lock include/asm-generic/qspinlock.h:111 [inline] BUG: KASAN: slab-use-after-free in do_raw_spin_lock include/linux/spinlock.h:186 [inline] BUG: KASAN: slab-use-after-free in __raw_spin_lock_bh include/linux/spinlock_api_smp.h:127 [inline] BUG: KASAN: slab-use-after-free in _raw_spin_lock_bh+0x75/0xe0 kernel/locking/spinlock.c:178 Write of size 4 at addr ffff88800dbef398 by task syz-executor.2/2401 CPU: 1 PID: 2401 Comm: syz-executor.2 Not tainted 6.4.0-rc5-01219-gfa0e21fa4443 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x72/0xa0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:351 [inline] print_report+0xcc/0x620 mm/kasan/report.c:462 kasan_report+0xb2/0xe0 mm/kasan/report.c:572 check_region_inline mm/kasan/generic.c:181 [inline] kasan_check_range+0x39/0x1c0 mm/kasan/generic.c:187 instrument_atomic_read_write include/linux/instrumented.h:96 [inline] atomic_try_cmpxchg_acquire include/linux/atomic/atomic-instrumented.h:541 [inline] queued_spin_lock include/asm-generic/qspinlock.h:111 [inline] do_raw_spin_lock include/linux/spinlock.h:186 [inline] __raw_spin_lock_bh include/linux/spinlock_api_smp.h:127 [inline] _raw_spin_lock_bh+0x75/0xe0 kernel/locking/spinlock.c:178 spin_lock_bh include/linux/spinlock.h:355 [inline] release_sock+0x1f/0x1a0 net/core/sock.c:3526 gtp_encap_disable_sock drivers/net/gtp.c:651 [inline] gtp_encap_disable+0xb9/0x220 drivers/net/gtp.c:664 gtp_dev_uninit+0x19/0x50 drivers/net/gtp.c:728 unregister_netdevice_many_notify+0x97e/0x1520 net/core/dev.c:10841 rtnl_delete_link net/core/rtnetlink.c:3216 [inline] rtnl_dellink+0x3c0/0xb30 net/core/rtnetlink.c:3268 rtnetlink_rcv_msg+0x450/0xb10 net/core/rtnetlink.c:6423 netlink_rcv_skb+0x15d/0x450 net/netlink/af_netlink.c:2548 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x700/0x930 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x91c/0xe30 net/netlink/af_netlink.c:1913 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg+0x1b7/0x200 net/socket.c:747 ____sys_sendmsg+0x75a/0x990 net/socket.c:2493 ___sys_sendmsg+0x11d/0x1c0 net/socket.c:2547 __sys_sendmsg+0xfe/0x1d0 net/socket.c:2576 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f1168b1fe5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007f1167edccc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004bbf80 RCX: 00007f1168b1fe5d RDX: 0000000000000000 RSI: 00000000200002c0 RDI: 0000000000000003 RBP: 00000000004bbf80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f1168b80530 R15: 0000000000000000 Allocated by task 1483: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 __kasan_slab_alloc+0x59/0x70 mm/kasan/common.c:328 kasan_slab_alloc include/linux/kasan.h:186 [inline] slab_post_alloc_hook mm/slab.h:711 [inline] slab_alloc_node mm/slub.c:3451 [inline] slab_alloc mm/slub.c:3459 [inline] __kmem_cache_alloc_lru mm/slub.c:3466 [inline] kmem_cache_alloc+0x16d/0x340 mm/slub.c:3475 sk_prot_alloc+0x5f/0x280 net/core/sock.c:2073 sk_alloc+0x34/0x6c0 net/core/sock.c:2132 inet6_create net/ipv6/af_inet6.c:192 [inline] inet6_create+0x2c7/0xf20 net/ipv6/af_inet6.c:119 __sock_create+0x2a1/0x530 net/socket.c:1535 sock_create net/socket.c:1586 [inline] __sys_socket_create net/socket.c:1623 [inline] __sys_socket_create net/socket.c:1608 [inline] __sys_socket+0x137/0x250 net/socket.c:1651 __do_sys_socket net/socket.c:1664 [inline] __se_sys_socket net/socket.c:1662 [inline] __x64_sys_socket+0x72/0xb0 net/socket.c:1662 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc Freed by task 2401: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x50 mm/kasan/generic.c:521 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free mm/kasan/common.c:200 [inline] __kasan_slab_free+0x10c/0x1b0 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:162 [inline] slab_free_hook mm/slub.c:1781 [inline] slab_free_freelist_hook mm/slub.c:1807 [inline] slab_free mm/slub.c:3786 [inline] kmem_cache_free+0xb4/0x490 mm/slub.c:3808 sk_prot_free net/core/sock.c:2113 [inline] __sk_destruct+0x500/0x720 net/core/sock.c:2207 sk_destruct+0xc1/0xe0 net/core/sock.c:2222 __sk_free+0xed/0x3d0 net/core/sock.c:2233 sk_free+0x7c/0xa0 net/core/sock.c:2244 sock_put include/net/sock.h:1981 [inline] __gtp_encap_destroy+0x165/0x1b0 drivers/net/gtp.c:634 gtp_encap_disable_sock drivers/net/gtp.c:651 [inline] gtp_encap_disable+0xb9/0x220 drivers/net/gtp.c:664 gtp_dev_uninit+0x19/0x50 drivers/net/gtp.c:728 unregister_netdevice_many_notify+0x97e/0x1520 net/core/dev.c:10841 rtnl_delete_link net/core/rtnetlink.c:3216 [inline] rtnl_dellink+0x3c0/0xb30 net/core/rtnetlink.c:3268 rtnetlink_rcv_msg+0x450/0xb10 net/core/rtnetlink.c:6423 netlink_rcv_skb+0x15d/0x450 net/netlink/af_netlink.c:2548 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x700/0x930 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x91c/0xe30 net/netlink/af_netlink.c:1913 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg+0x1b7/0x200 net/socket.c:747 ____sys_sendmsg+0x75a/0x990 net/socket.c:2493 ___sys_sendmsg+0x11d/0x1c0 net/socket.c:2547 __sys_sendmsg+0xfe/0x1d0 net/socket.c:2576 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc The buggy address belongs to the object at ffff88800dbef300 which belongs to the cache UDPv6 of size 1344 The buggy address is located 152 bytes inside of freed 1344-byte region [ffff88800dbef300, ffff88800dbef840) The buggy address belongs to the physical page: page:00000000d31bfed5 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88800dbeed40 pfn:0xdbe8 head:00000000d31bfed5 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 memcg:ffff888008ee0801 flags: 0x100000000010200(slab|head|node=0|zone=1) page_type: 0xffffffff() raw: 0100000000010200 ffff88800c7a3000 dead000000000122 0000000000000000 raw: ffff88800dbeed40 0000000080160015 00000001ffffffff ffff888008ee0801 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88800dbef280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88800dbef300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff88800dbef380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88800dbef400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88800dbef480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: e198987e7dd7 ("gtp: fix suspicious RCU usage") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Reviewed-by: Pablo Neira Ayuso Link: https://lore.kernel.org/r/20230622213231.24651-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/gtp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 1c46bc4d2705..05ea3a18552b 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -291,7 +291,9 @@ static void __gtp_encap_destroy(struct sock *sk) gtp->sk1u = NULL; udp_sk(sk)->encap_type = 0; rcu_assign_sk_user_data(sk, NULL); + release_sock(sk); sock_put(sk); + return; } release_sock(sk); } From 6ccfec84f02576c44ebe98e5f59d60c07825a460 Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Thu, 22 Jun 2023 22:22:45 +0300 Subject: [PATCH 084/509] net: axienet: Move reset before 64-bit DMA detection [ Upstream commit f1bc9fc4a06de0108e0dca2a9a7e99ba1fc632f9 ] 64-bit DMA detection will fail if axienet was started before (by boot loader, boot ROM, etc). In this state axienet will not start properly. XAXIDMA_TX_CDESC_OFFSET + 4 register (MM2S_CURDESC_MSB) is used to detect 64-bit DMA capability here. But datasheet says: When DMACR.RS is 1 (axienet is in enabled state), CURDESC_PTR becomes Read Only (RO) and is used to fetch the first descriptor. So iowrite32()/ioread32() trick to this register to detect 64-bit DMA will not work. So move axienet reset before 64-bit DMA detection. Fixes: f735c40ed93c ("net: axienet: Autodetect 64-bit DMA capability") Signed-off-by: Maxim Kochetkov Reviewed-by: Robert Hancock Reviewed-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20230622192245.116864-1-fido_max@inbox.ru Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 3d91baf2e55a..9d362283196a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2009,6 +2009,11 @@ static int axienet_probe(struct platform_device *pdev) goto cleanup_clk; } + /* Reset core now that clocks are enabled, prior to accessing MDIO */ + ret = __axienet_device_reset(lp); + if (ret) + goto cleanup_clk; + /* Autodetect the need for 64-bit DMA pointers. * When the IP is configured for a bus width bigger than 32 bits, * writing the MSB registers is mandatory, even if they are all 0. @@ -2055,11 +2060,6 @@ static int axienet_probe(struct platform_device *pdev) lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; - /* Reset core now that clocks are enabled, prior to accessing MDIO */ - ret = __axienet_device_reset(lp); - if (ret) - goto cleanup_clk; - ret = axienet_mdio_setup(lp); if (ret) dev_warn(&pdev->dev, From cb1aa7cc562cab6a87ea33574c8c65f2d2fd7aeb Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 23 Jun 2023 15:34:48 +0100 Subject: [PATCH 085/509] sfc: fix crash when reading stats while NIC is resetting [ Upstream commit d1b355438b8325a486f087e506d412c4e852f37b ] efx_net_stats() (.ndo_get_stats64) can be called during an ethtool selftest, during which time nic_data->mc_stats is NULL as the NIC has been fini'd. In this case do not attempt to fetch the latest stats from the hardware, else we will crash on a NULL dereference: BUG: kernel NULL pointer dereference, address: 0000000000000038 RIP efx_nic_update_stats abridged calltrace: efx_ef10_update_stats_pf efx_net_stats dev_get_stats dev_seq_printf_stats Skipping the read is safe, we will simply give out stale stats. To ensure that the free in efx_ef10_fini_nic() does not race against efx_ef10_update_stats_pf(), which could cause a TOCTTOU bug, take the efx->stats_lock in fini_nic (it is already held across update_stats). Fixes: d3142c193dca ("sfc: refactor EF10 stats handling") Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Edward Cree Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/sfc/ef10.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 32654fe1f8b5..3f53b5ea7841 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1297,8 +1297,10 @@ static void efx_ef10_fini_nic(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; + spin_lock_bh(&efx->stats_lock); kfree(nic_data->mc_stats); nic_data->mc_stats = NULL; + spin_unlock_bh(&efx->stats_lock); } static int efx_ef10_init_nic(struct efx_nic *efx) @@ -1836,9 +1838,14 @@ static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats, efx_ef10_get_stat_mask(efx, mask); - efx_nic_copy_stats(efx, nic_data->mc_stats); - efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, - mask, stats, nic_data->mc_stats, false); + /* If NIC was fini'd (probably resetting), then we can't read + * updated stats right now. + */ + if (nic_data->mc_stats) { + efx_nic_copy_stats(efx, nic_data->mc_stats); + efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, + mask, stats, nic_data->mc_stats, false); + } /* Update derived statistics */ efx_nic_fix_nodesc_drop_stat(efx, From a3a1550c4d2e5fecbd317778ef2832e933d4774b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 2 Mar 2022 20:25:19 +0100 Subject: [PATCH 086/509] nfc: llcp: simplify llcp_sock_connect() error paths [ Upstream commit ec10fd154d934cc4195da3cbd017a12817b41d51 ] The llcp_sock_connect() error paths were using a mixed way of central exit (goto) and cleanup Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller Stable-dep-of: 6709d4b7bc2e ("net: nfc: Fix use-after-free caused by nfc_llcp_find_local") Signed-off-by: Sasha Levin --- net/nfc/llcp_sock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 0b93a17b9f11..fdf0856182c6 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -712,10 +712,8 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, llcp_sock->local = nfc_llcp_local_get(local); llcp_sock->ssap = nfc_llcp_get_local_ssap(local); if (llcp_sock->ssap == LLCP_SAP_MAX) { - nfc_llcp_local_put(llcp_sock->local); - llcp_sock->local = NULL; ret = -ENOMEM; - goto put_dev; + goto sock_llcp_put_local; } llcp_sock->reserved_ssap = llcp_sock->ssap; @@ -760,8 +758,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, sock_llcp_release: nfc_llcp_put_ssap(local, llcp_sock->ssap); + +sock_llcp_put_local: nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; + llcp_sock->dev = NULL; put_dev: nfc_put_device(dev); From 96f2c6f272ec04083d828de46285a7d7b17d1aad Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Sun, 25 Jun 2023 17:10:07 +0800 Subject: [PATCH 087/509] net: nfc: Fix use-after-free caused by nfc_llcp_find_local [ Upstream commit 6709d4b7bc2e079241fdef15d1160581c5261c10 ] This commit fixes several use-after-free that caused by function nfc_llcp_find_local(). For example, one UAF can happen when below buggy time window occurs. // nfc_genl_llc_get_params | // nfc_unregister_device | dev = nfc_get_device(idx); | device_lock(...) if (!dev) | dev->shutting_down = true; return -ENODEV; | device_unlock(...); | device_lock(...); | // nfc_llcp_unregister_device | nfc_llcp_find_local() nfc_llcp_find_local(...); | | local_cleanup() if (!local) { | rc = -ENODEV; | // nfc_llcp_local_put goto exit; | kref_put(.., local_release) } | | // local_release | list_del(&local->list) // nfc_genl_send_params | kfree() local->dev->idx !!!UAF!!! | | and the crash trace for the one of the discussed UAF like: BUG: KASAN: slab-use-after-free in nfc_genl_llc_get_params+0x72f/0x780 net/nfc/netlink.c:1045 Read of size 8 at addr ffff888105b0e410 by task 20114 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x72/0xa0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:319 [inline] print_report+0xcc/0x620 mm/kasan/report.c:430 kasan_report+0xb2/0xe0 mm/kasan/report.c:536 nfc_genl_send_params net/nfc/netlink.c:999 [inline] nfc_genl_llc_get_params+0x72f/0x780 net/nfc/netlink.c:1045 genl_family_rcv_msg_doit.isra.0+0x1ee/0x2e0 net/netlink/genetlink.c:968 genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] genl_rcv_msg+0x503/0x7d0 net/netlink/genetlink.c:1065 netlink_rcv_skb+0x161/0x430 net/netlink/af_netlink.c:2548 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x644/0x900 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x934/0xe70 net/netlink/af_netlink.c:1913 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg+0x1b6/0x200 net/socket.c:747 ____sys_sendmsg+0x6e9/0x890 net/socket.c:2501 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2555 __sys_sendmsg+0xf7/0x1d0 net/socket.c:2584 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7f34640a2389 RSP: 002b:00007f3463415168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f34641c1f80 RCX: 00007f34640a2389 RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000006 RBP: 00007f34640ed493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffe38449ecf R14: 00007f3463415300 R15: 0000000000022000 Allocated by task 20116: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0x7f/0x90 mm/kasan/common.c:383 kmalloc include/linux/slab.h:580 [inline] kzalloc include/linux/slab.h:720 [inline] nfc_llcp_register_device+0x49/0xa40 net/nfc/llcp_core.c:1567 nfc_register_device+0x61/0x260 net/nfc/core.c:1124 nci_register_device+0x776/0xb20 net/nfc/nci/core.c:1257 virtual_ncidev_open+0x147/0x230 drivers/nfc/virtual_ncidev.c:148 misc_open+0x379/0x4a0 drivers/char/misc.c:165 chrdev_open+0x26c/0x780 fs/char_dev.c:414 do_dentry_open+0x6c4/0x12a0 fs/open.c:920 do_open fs/namei.c:3560 [inline] path_openat+0x24fe/0x37e0 fs/namei.c:3715 do_filp_open+0x1ba/0x410 fs/namei.c:3742 do_sys_openat2+0x171/0x4c0 fs/open.c:1356 do_sys_open fs/open.c:1372 [inline] __do_sys_openat fs/open.c:1388 [inline] __se_sys_openat fs/open.c:1383 [inline] __x64_sys_openat+0x143/0x200 fs/open.c:1383 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc Freed by task 20115: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x50 mm/kasan/generic.c:521 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free mm/kasan/common.c:200 [inline] __kasan_slab_free+0x10a/0x190 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:162 [inline] slab_free_hook mm/slub.c:1781 [inline] slab_free_freelist_hook mm/slub.c:1807 [inline] slab_free mm/slub.c:3787 [inline] __kmem_cache_free+0x7a/0x190 mm/slub.c:3800 local_release net/nfc/llcp_core.c:174 [inline] kref_put include/linux/kref.h:65 [inline] nfc_llcp_local_put net/nfc/llcp_core.c:182 [inline] nfc_llcp_local_put net/nfc/llcp_core.c:177 [inline] nfc_llcp_unregister_device+0x206/0x290 net/nfc/llcp_core.c:1620 nfc_unregister_device+0x160/0x1d0 net/nfc/core.c:1179 virtual_ncidev_close+0x52/0xa0 drivers/nfc/virtual_ncidev.c:163 __fput+0x252/0xa20 fs/file_table.c:321 task_work_run+0x174/0x270 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x108/0x110 kernel/entry/common.c:204 __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline] syscall_exit_to_user_mode+0x21/0x50 kernel/entry/common.c:297 do_syscall_64+0x4c/0x90 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x72/0xdc Last potentially related work creation: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 __kasan_record_aux_stack+0x95/0xb0 mm/kasan/generic.c:491 kvfree_call_rcu+0x29/0xa80 kernel/rcu/tree.c:3328 drop_sysctl_table+0x3be/0x4e0 fs/proc/proc_sysctl.c:1735 unregister_sysctl_table.part.0+0x9c/0x190 fs/proc/proc_sysctl.c:1773 unregister_sysctl_table+0x24/0x30 fs/proc/proc_sysctl.c:1753 neigh_sysctl_unregister+0x5f/0x80 net/core/neighbour.c:3895 addrconf_notify+0x140/0x17b0 net/ipv6/addrconf.c:3684 notifier_call_chain+0xbe/0x210 kernel/notifier.c:87 call_netdevice_notifiers_info+0xb5/0x150 net/core/dev.c:1937 call_netdevice_notifiers_extack net/core/dev.c:1975 [inline] call_netdevice_notifiers net/core/dev.c:1989 [inline] dev_change_name+0x3c3/0x870 net/core/dev.c:1211 dev_ifsioc+0x800/0xf70 net/core/dev_ioctl.c:376 dev_ioctl+0x3d9/0xf80 net/core/dev_ioctl.c:542 sock_do_ioctl+0x160/0x260 net/socket.c:1213 sock_ioctl+0x3f9/0x670 net/socket.c:1316 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x19e/0x210 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc The buggy address belongs to the object at ffff888105b0e400 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 16 bytes inside of freed 1024-byte region [ffff888105b0e400, ffff888105b0e800) The buggy address belongs to the physical page: head:ffffea000416c200 order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0x200000000010200(slab|head|node=0|zone=2) raw: 0200000000010200 ffff8881000430c0 ffffea00044c7010 ffffea0004510e10 raw: 0000000000000000 00000000000a000a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888105b0e300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff888105b0e380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888105b0e400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888105b0e480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888105b0e500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb In summary, this patch solves those use-after-free by 1. Re-implement the nfc_llcp_find_local(). The current version does not grab the reference when getting the local from the linked list. For example, the llcp_sock_bind() gets the reference like below: // llcp_sock_bind() local = nfc_llcp_find_local(dev); // A ..... \ | raceable ..... / llcp_sock->local = nfc_llcp_local_get(local); // B There is an apparent race window that one can drop the reference and free the local object fetched in (A) before (B) gets the reference. 2. Some callers of the nfc_llcp_find_local() do not grab the reference at all. For example, the nfc_genl_llc_{{get/set}_params/sdreq} functions. We add the nfc_llcp_local_put() for them. Moreover, we add the necessary error handling function to put the reference. 3. Add the nfc_llcp_remove_local() helper. The local object is removed from the linked list in local_release() when all reference is gone. This patch removes it when nfc_llcp_unregister_device() is called. Therefore, every caller of nfc_llcp_find_local() will get a reference even when the nfc_llcp_unregister_device() is called. This promises no use-after-free for the local object is ever possible. Fixes: 52feb444a903 ("NFC: Extend netlink interface for LTO, RW, and MIUX parameters support") Fixes: c7aa12252f51 ("NFC: Take a reference on the LLCP local pointer when creating a socket") Signed-off-by: Lin Ma Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/nfc/llcp.h | 1 - net/nfc/llcp_commands.c | 12 +++++++--- net/nfc/llcp_core.c | 51 ++++++++++++++++++++++++++++++++++------- net/nfc/llcp_sock.c | 18 ++++++++------- net/nfc/netlink.c | 20 ++++++++++++---- net/nfc/nfc.h | 1 + 6 files changed, 78 insertions(+), 25 deletions(-) diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index d49d4bf2e37c..a81893bc06ce 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -202,7 +202,6 @@ void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *s); void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *s); void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); -struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local); int nfc_llcp_local_put(struct nfc_llcp_local *local); u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local, struct nfc_llcp_sock *sock); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index bb9f40563ff6..5b8754ae7d3a 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -361,6 +361,7 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) struct sk_buff *skb; struct nfc_llcp_local *local; u16 size = 0; + int err; pr_debug("Sending SYMM\n"); @@ -372,8 +373,10 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; skb = alloc_skb(size, GFP_KERNEL); - if (skb == NULL) - return -ENOMEM; + if (skb == NULL) { + err = -ENOMEM; + goto out; + } skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); @@ -383,8 +386,11 @@ int nfc_llcp_send_symm(struct nfc_dev *dev) nfc_llcp_send_to_raw_sock(local, skb, NFC_DIRECTION_TX); - return nfc_data_exchange(dev, local->target_idx, skb, + err = nfc_data_exchange(dev, local->target_idx, skb, nfc_llcp_recv, local); +out: + nfc_llcp_local_put(local); + return err; } int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index fd43e75abd94..ddfd159f64e1 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -17,6 +17,8 @@ static u8 llcp_magic[3] = {0x46, 0x66, 0x6d}; static LIST_HEAD(llcp_devices); +/* Protects llcp_devices list */ +static DEFINE_SPINLOCK(llcp_devices_lock); static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb); @@ -143,7 +145,7 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device, write_unlock(&local->raw_sockets.lock); } -struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) +static struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local) { kref_get(&local->ref); @@ -171,7 +173,6 @@ static void local_release(struct kref *ref) local = container_of(ref, struct nfc_llcp_local, ref); - list_del(&local->list); local_cleanup(local); kfree(local); } @@ -284,12 +285,33 @@ static void nfc_llcp_sdreq_timer(struct timer_list *t) struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) { struct nfc_llcp_local *local; + struct nfc_llcp_local *res = NULL; + spin_lock(&llcp_devices_lock); list_for_each_entry(local, &llcp_devices, list) - if (local->dev == dev) - return local; + if (local->dev == dev) { + res = nfc_llcp_local_get(local); + break; + } + spin_unlock(&llcp_devices_lock); - pr_debug("No device found\n"); + return res; +} + +static struct nfc_llcp_local *nfc_llcp_remove_local(struct nfc_dev *dev) +{ + struct nfc_llcp_local *local, *tmp; + + spin_lock(&llcp_devices_lock); + list_for_each_entry_safe(local, tmp, &llcp_devices, list) + if (local->dev == dev) { + list_del(&local->list); + spin_unlock(&llcp_devices_lock); + return local; + } + spin_unlock(&llcp_devices_lock); + + pr_warn("Shutting down device not found\n"); return NULL; } @@ -610,12 +632,15 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len) *general_bytes_len = local->gb_len; + nfc_llcp_local_put(local); + return local->gb; } int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len) { struct nfc_llcp_local *local; + int err; if (gb_len < 3 || gb_len > NFC_MAX_GT_LEN) return -EINVAL; @@ -632,12 +657,16 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len) if (memcmp(local->remote_gb, llcp_magic, 3)) { pr_err("MAC does not support LLCP\n"); - return -EINVAL; + err = -EINVAL; + goto out; } - return nfc_llcp_parse_gb_tlv(local, + err = nfc_llcp_parse_gb_tlv(local, &local->remote_gb[3], local->remote_gb_len - 3); +out: + nfc_llcp_local_put(local); + return err; } static u8 nfc_llcp_dsap(const struct sk_buff *pdu) @@ -1527,6 +1556,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb) __nfc_llcp_recv(local, skb); + nfc_llcp_local_put(local); + return 0; } @@ -1543,6 +1574,8 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev) /* Close and purge all existing sockets */ nfc_llcp_socket_release(local, true, 0); + + nfc_llcp_local_put(local); } void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, @@ -1568,6 +1601,8 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, mod_timer(&local->link_timer, jiffies + msecs_to_jiffies(local->remote_lto)); } + + nfc_llcp_local_put(local); } int nfc_llcp_register_device(struct nfc_dev *ndev) @@ -1618,7 +1653,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) void nfc_llcp_unregister_device(struct nfc_dev *dev) { - struct nfc_llcp_local *local = nfc_llcp_find_local(dev); + struct nfc_llcp_local *local = nfc_llcp_remove_local(dev); if (local == NULL) { pr_debug("No such device\n"); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index fdf0856182c6..6e1fba208493 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -99,7 +99,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) } llcp_sock->dev = dev; - llcp_sock->local = nfc_llcp_local_get(local); + llcp_sock->local = local; llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; llcp_sock->service_name_len = min_t(unsigned int, llcp_addr.service_name_len, @@ -181,7 +181,7 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr, } llcp_sock->dev = dev; - llcp_sock->local = nfc_llcp_local_get(local); + llcp_sock->local = local; llcp_sock->nfc_protocol = llcp_addr.nfc_protocol; nfc_llcp_sock_link(&local->raw_sockets, sk); @@ -698,22 +698,22 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, if (dev->dep_link_up == false) { ret = -ENOLINK; device_unlock(&dev->dev); - goto put_dev; + goto sock_llcp_put_local; } device_unlock(&dev->dev); if (local->rf_mode == NFC_RF_INITIATOR && addr->target_idx != local->target_idx) { ret = -ENOLINK; - goto put_dev; + goto sock_llcp_put_local; } llcp_sock->dev = dev; - llcp_sock->local = nfc_llcp_local_get(local); + llcp_sock->local = local; llcp_sock->ssap = nfc_llcp_get_local_ssap(local); if (llcp_sock->ssap == LLCP_SAP_MAX) { ret = -ENOMEM; - goto sock_llcp_put_local; + goto sock_llcp_nullify; } llcp_sock->reserved_ssap = llcp_sock->ssap; @@ -759,11 +759,13 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, sock_llcp_release: nfc_llcp_put_ssap(local, llcp_sock->ssap); -sock_llcp_put_local: - nfc_llcp_local_put(llcp_sock->local); +sock_llcp_nullify: llcp_sock->local = NULL; llcp_sock->dev = NULL; +sock_llcp_put_local: + nfc_llcp_local_put(local); + put_dev: nfc_put_device(dev); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index e0e116865511..1c5b3ce1e8b1 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1039,11 +1039,14 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { rc = -ENOMEM; - goto exit; + goto put_local; } rc = nfc_genl_send_params(msg, local, info->snd_portid, info->snd_seq); +put_local: + nfc_llcp_local_put(local); + exit: device_unlock(&dev->dev); @@ -1105,7 +1108,7 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NFC_ATTR_LLC_PARAM_LTO]) { if (dev->dep_link_up) { rc = -EINPROGRESS; - goto exit; + goto put_local; } local->lto = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_LTO]); @@ -1117,6 +1120,9 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) local->miux = cpu_to_be16(miux); +put_local: + nfc_llcp_local_put(local); + exit: device_unlock(&dev->dev); @@ -1172,7 +1178,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) if (rc != 0) { rc = -EINVAL; - goto exit; + goto put_local; } if (!sdp_attrs[NFC_SDP_ATTR_URI]) @@ -1191,7 +1197,7 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len); if (sdreq == NULL) { rc = -ENOMEM; - goto exit; + goto put_local; } tlvs_len += sdreq->tlv_len; @@ -1201,10 +1207,14 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) if (hlist_empty(&sdreq_list)) { rc = -EINVAL; - goto exit; + goto put_local; } rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len); + +put_local: + nfc_llcp_local_put(local); + exit: device_unlock(&dev->dev); diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index de2ec66d7e83..0b1e6466f4fb 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -52,6 +52,7 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); +int nfc_llcp_local_put(struct nfc_llcp_local *local); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); From 36e07e8acfb9d4aacd6abf92a5e4c65b789f613a Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Mon, 19 Jun 2023 20:06:57 +0100 Subject: [PATCH 088/509] lib/ts_bm: reset initial match offset for every block of text [ Upstream commit 6f67fbf8192da80c4db01a1800c7fceaca9cf1f9 ] The `shift` variable which indicates the offset in the string at which to start matching the pattern is initialized to `bm->patlen - 1`, but it is not reset when a new block is retrieved. This means the implemen- tation may start looking at later and later positions in each successive block and miss occurrences of the pattern at the beginning. E.g., consider a HTTP packet held in a non-linear skb, where the HTTP request line occurs in the second block: [... 52 bytes of packet headers ...] GET /bmtest HTTP/1.1\r\nHost: www.example.com\r\n\r\n and the pattern is "GET /bmtest". Once the first block comprising the packet headers has been examined, `shift` will be pointing to somewhere near the end of the block, and so when the second block is examined the request line at the beginning will be missed. Reinitialize the variable for each new block. Fixes: 8082e4ed0a61 ("[LIB]: Boyer-Moore extension for textsearch infrastructure strike #2") Link: https://bugzilla.netfilter.org/show_bug.cgi?id=1390 Signed-off-by: Jeremy Sowden Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- lib/ts_bm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/ts_bm.c b/lib/ts_bm.c index 4cf250031f0f..352ae837e031 100644 --- a/lib/ts_bm.c +++ b/lib/ts_bm.c @@ -60,10 +60,12 @@ static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) struct ts_bm *bm = ts_config_priv(conf); unsigned int i, text_len, consumed = state->offset; const u8 *text; - int shift = bm->patlen - 1, bs; + int bs; const u8 icase = conf->flags & TS_IGNORECASE; for (;;) { + int shift = bm->patlen - 1; + text_len = conf->get_next_block(consumed, &text, conf, state); if (unlikely(text_len == 0)) From 9bdcda7abaf22f6453e5b5efb7eb4e524095d5d8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 21 Jun 2023 17:56:53 +0200 Subject: [PATCH 089/509] netfilter: conntrack: dccp: copy entire header to stack buffer, not just basic one [ Upstream commit ff0a3a7d52ff7282dbd183e7fc29a1fe386b0c30 ] Eric Dumazet says: nf_conntrack_dccp_packet() has an unique: dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); And nothing more is 'pulled' from the packet, depending on the content. dh->dccph_doff, and/or dh->dccph_x ...) So dccp_ack_seq() is happily reading stuff past the _dh buffer. BUG: KASAN: stack-out-of-bounds in nf_conntrack_dccp_packet+0x1134/0x11c0 Read of size 4 at addr ffff000128f66e0c by task syz-executor.2/29371 [..] Fix this by increasing the stack buffer to also include room for the extra sequence numbers and all the known dccp packet type headers, then pull again after the initial validation of the basic header. While at it, mark packets invalid that lack 48bit sequence bit but where RFC says the type MUST use them. Compile tested only. v2: first skb_header_pointer() now needs to adjust the size to only pull the generic header. (Eric) Heads-up: I intend to remove dccp conntrack support later this year. Fixes: 2bc780499aa3 ("[NETFILTER]: nf_conntrack: add DCCP protocol support") Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_proto_dccp.c | 52 +++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 94001eb51ffe..a9ae292e932a 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -431,9 +431,19 @@ static bool dccp_error(const struct dccp_hdr *dh, struct sk_buff *skb, unsigned int dataoff, const struct nf_hook_state *state) { + static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST | + 1 << DCCP_PKT_RESPONSE | + 1 << DCCP_PKT_CLOSEREQ | + 1 << DCCP_PKT_CLOSE | + 1 << DCCP_PKT_RESET | + 1 << DCCP_PKT_SYNC | + 1 << DCCP_PKT_SYNCACK; unsigned int dccp_len = skb->len - dataoff; unsigned int cscov; const char *msg; + u8 type; + + BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG); if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) || dh->dccph_doff * 4 > dccp_len) { @@ -458,10 +468,17 @@ static bool dccp_error(const struct dccp_hdr *dh, goto out_invalid; } - if (dh->dccph_type >= DCCP_PKT_INVALID) { + type = dh->dccph_type; + if (type >= DCCP_PKT_INVALID) { msg = "nf_ct_dccp: reserved packet type "; goto out_invalid; } + + if (test_bit(type, &require_seq48) && !dh->dccph_x) { + msg = "nf_ct_dccp: type lacks 48bit sequence numbers"; + goto out_invalid; + } + return false; out_invalid: nf_l4proto_log_invalid(skb, state->net, state->pf, @@ -469,24 +486,53 @@ static bool dccp_error(const struct dccp_hdr *dh, return true; } +struct nf_conntrack_dccp_buf { + struct dccp_hdr dh; /* generic header part */ + struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */ + union { /* depends on header type */ + struct dccp_hdr_ack_bits ack; + struct dccp_hdr_request req; + struct dccp_hdr_response response; + struct dccp_hdr_reset rst; + } u; +}; + +static struct dccp_hdr * +dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh, + struct nf_conntrack_dccp_buf *buf) +{ + unsigned int hdrlen = __dccp_hdr_len(dh); + + if (hdrlen > sizeof(*buf)) + return NULL; + + return skb_header_pointer(skb, offset, hdrlen, buf); +} + int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - struct dccp_hdr _dh, *dh; + struct nf_conntrack_dccp_buf _dh; u_int8_t type, old_state, new_state; enum ct_dccp_roles role; unsigned int *timeouts; + struct dccp_hdr *dh; - dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); + dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh); if (!dh) return NF_DROP; if (dccp_error(dh, skb, dataoff, state)) return -NF_ACCEPT; + /* pull again, including possible 48 bit sequences and subtype header */ + dh = dccp_header_pointer(skb, dataoff, dh, &_dh); + if (!dh) + return NF_DROP; + type = dh->dccph_type; if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh)) return -NF_ACCEPT; From 5215c009683903fde66eb11d29026f15f4d53332 Mon Sep 17 00:00:00 2001 From: "Ilia.Gavrilov" Date: Fri, 23 Jun 2023 11:23:46 +0000 Subject: [PATCH 090/509] netfilter: nf_conntrack_sip: fix the ct_sip_parse_numerical_param() return value. [ Upstream commit f188d30087480eab421cd8ca552fb15f55d57f4d ] ct_sip_parse_numerical_param() returns only 0 or 1 now. But process_register_request() and process_register_response() imply checking for a negative value if parsing of a numerical header parameter failed. The invocation in nf_nat_sip() looks correct: if (ct_sip_parse_numerical_param(...) > 0 && ...) { ... } Make the return value of the function ct_sip_parse_numerical_param() a tristate to fix all the cases a) return 1 if value is found; *val is set b) return 0 if value is not found; *val is unchanged c) return -1 on error; *val is undefined Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 0f32a40fc91a ("[NETFILTER]: nf_conntrack_sip: create signalling expectations") Signed-off-by: Ilia.Gavrilov Reviewed-by: Simon Horman Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_sip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 78fd9122b70c..751df19fe0f8 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -611,7 +611,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, start += strlen(name); *val = simple_strtoul(start, &end, 0); if (start == end) - return 0; + return -1; if (matchoff && matchlen) { *matchoff = start - dptr; *matchlen = end - start; From d10b38036977310e1a7c763e9e4f99bd6fa3c1bb Mon Sep 17 00:00:00 2001 From: Cambda Zhu Date: Mon, 26 Jun 2023 17:33:47 +0800 Subject: [PATCH 091/509] ipvlan: Fix return value of ipvlan_queue_xmit() [ Upstream commit 8a9922e7be6d042fa00f894c376473b17a162b66 ] ipvlan_queue_xmit() should return NET_XMIT_XXX, but ipvlan_xmit_mode_l2/l3() returns rx_handler_result_t or NET_RX_XXX in some cases. ipvlan_rcv_frame() will only return RX_HANDLER_CONSUMED in ipvlan_xmit_mode_l2/l3() because 'local' is true. It's equal to NET_XMIT_SUCCESS. But dev_forward_skb() can return NET_RX_SUCCESS or NET_RX_DROP, and returning NET_RX_DROP(NET_XMIT_DROP) will increase both ipvlan and ipvlan->phy_dev drops counter. The skb to forward can be treated as xmitted successfully. This patch makes ipvlan_queue_xmit() return NET_XMIT_SUCCESS for forward skb. Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Signed-off-by: Cambda Zhu Link: https://lore.kernel.org/r/20230626093347.7492-1-cambda@linux.alibaba.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ipvlan/ipvlan_core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 0a5b5ff597c6..ab09d110760e 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -586,7 +586,8 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) consume_skb(skb); return NET_XMIT_DROP; } - return ipvlan_rcv_frame(addr, &skb, true); + ipvlan_rcv_frame(addr, &skb, true); + return NET_XMIT_SUCCESS; } } out: @@ -612,7 +613,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) consume_skb(skb); return NET_XMIT_DROP; } - return ipvlan_rcv_frame(addr, &skb, true); + ipvlan_rcv_frame(addr, &skb, true); + return NET_XMIT_SUCCESS; } } skb = skb_share_check(skb, GFP_ATOMIC); @@ -624,7 +626,8 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) * the skb for the main-dev. At the RX side we just return * RX_PASS for it to be processed further on the stack. */ - return dev_forward_skb(ipvlan->phy_dev, skb); + dev_forward_skb(ipvlan->phy_dev, skb); + return NET_XMIT_SUCCESS; } else if (is_multicast_ether_addr(eth->h_dest)) { skb_reset_mac_header(skb); From 472a615e66b9b4c7d631f8d493553f035c9d96de Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 26 Jun 2023 09:43:13 -0700 Subject: [PATCH 092/509] netlink: Add __sock_i_ino() for __netlink_diag_dump(). [ Upstream commit 25a9c8a4431c364f97f75558cb346d2ad3f53fbb ] syzbot reported a warning in __local_bh_enable_ip(). [0] Commit 8d61f926d420 ("netlink: fix potential deadlock in netlink_set_err()") converted read_lock(&nl_table_lock) to read_lock_irqsave() in __netlink_diag_dump() to prevent a deadlock. However, __netlink_diag_dump() calls sock_i_ino() that uses read_lock_bh() and read_unlock_bh(). If CONFIG_TRACE_IRQFLAGS=y, read_unlock_bh() finally enables IRQ even though it should stay disabled until the following read_unlock_irqrestore(). Using read_lock() in sock_i_ino() would trigger a lockdep splat in another place that was fixed in commit f064af1e500a ("net: fix a lockdep splat"), so let's add __sock_i_ino() that would be safe to use under BH disabled. [0]: WARNING: CPU: 0 PID: 5012 at kernel/softirq.c:376 __local_bh_enable_ip+0xbe/0x130 kernel/softirq.c:376 Modules linked in: CPU: 0 PID: 5012 Comm: syz-executor487 Not tainted 6.4.0-rc7-syzkaller-00202-g6f68fc395f49 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 RIP: 0010:__local_bh_enable_ip+0xbe/0x130 kernel/softirq.c:376 Code: 45 bf 01 00 00 00 e8 91 5b 0a 00 e8 3c 15 3d 00 fb 65 8b 05 ec e9 b5 7e 85 c0 74 58 5b 5d c3 65 8b 05 b2 b6 b4 7e 85 c0 75 a2 <0f> 0b eb 9e e8 89 15 3d 00 eb 9f 48 89 ef e8 6f 49 18 00 eb a8 0f RSP: 0018:ffffc90003a1f3d0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000201 RCX: 1ffffffff1cf5996 RDX: 0000000000000000 RSI: 0000000000000201 RDI: ffffffff8805c6f3 RBP: ffffffff8805c6f3 R08: 0000000000000001 R09: ffff8880152b03a3 R10: ffffed1002a56074 R11: 0000000000000005 R12: 00000000000073e4 R13: dffffc0000000000 R14: 0000000000000002 R15: 0000000000000000 FS: 0000555556726300(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000045ad50 CR3: 000000007c646000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sock_i_ino+0x83/0xa0 net/core/sock.c:2559 __netlink_diag_dump+0x45c/0x790 net/netlink/diag.c:171 netlink_diag_dump+0xd6/0x230 net/netlink/diag.c:207 netlink_dump+0x570/0xc50 net/netlink/af_netlink.c:2269 __netlink_dump_start+0x64b/0x910 net/netlink/af_netlink.c:2374 netlink_dump_start include/linux/netlink.h:329 [inline] netlink_diag_handler_dump+0x1ae/0x250 net/netlink/diag.c:238 __sock_diag_cmd net/core/sock_diag.c:238 [inline] sock_diag_rcv_msg+0x31e/0x440 net/core/sock_diag.c:269 netlink_rcv_skb+0x165/0x440 net/netlink/af_netlink.c:2547 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x547/0x7f0 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x925/0xe30 net/netlink/af_netlink.c:1914 sock_sendmsg_nosec net/socket.c:724 [inline] sock_sendmsg+0xde/0x190 net/socket.c:747 ____sys_sendmsg+0x71c/0x900 net/socket.c:2503 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2557 __sys_sendmsg+0xf7/0x1c0 net/socket.c:2586 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f5303aaabb9 Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffc7506e548 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5303aaabb9 RDX: 0000000000000000 RSI: 0000000020000180 RDI: 0000000000000003 RBP: 00007f5303a6ed60 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f5303a6edf0 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fixes: 8d61f926d420 ("netlink: fix potential deadlock in netlink_set_err()") Reported-by: syzbot+5da61cf6a9bc1902d422@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=5da61cf6a9bc1902d422 Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230626164313.52528-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/sock.h | 1 + net/core/sock.c | 17 ++++++++++++++--- net/netlink/diag.c | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 03e7f7581559..1fb5c535537c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1934,6 +1934,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) } kuid_t sock_i_uid(struct sock *sk); +unsigned long __sock_i_ino(struct sock *sk); unsigned long sock_i_ino(struct sock *sk); static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) diff --git a/net/core/sock.c b/net/core/sock.c index 9b013d052a72..4e00c6e2cb43 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2175,13 +2175,24 @@ kuid_t sock_i_uid(struct sock *sk) } EXPORT_SYMBOL(sock_i_uid); +unsigned long __sock_i_ino(struct sock *sk) +{ + unsigned long ino; + + read_lock(&sk->sk_callback_lock); + ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; + read_unlock(&sk->sk_callback_lock); + return ino; +} +EXPORT_SYMBOL(__sock_i_ino); + unsigned long sock_i_ino(struct sock *sk) { unsigned long ino; - read_lock_bh(&sk->sk_callback_lock); - ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock_bh(&sk->sk_callback_lock); + local_bh_disable(); + ino = __sock_i_ino(sk); + local_bh_enable(); return ino; } EXPORT_SYMBOL(sock_i_ino); diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 4143b2ea4195..e4f21b1067bc 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -168,7 +168,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - sock_i_ino(sk)) < 0) { + __sock_i_ino(sk)) < 0) { ret = 1; break; } From b2213fc60b83dcd7b15c39bffd22ebd201f50e15 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 13 Apr 2023 08:12:28 -0700 Subject: [PATCH 093/509] radeon: avoid double free in ci_dpm_init() [ Upstream commit 20c3dffdccbd494e0dd631d1660aeecbff6775f2 ] Several calls to ci_dpm_fini() will attempt to free resources that either have been freed before or haven't been allocated yet. This may lead to undefined or dangerous behaviour. For instance, if r600_parse_extended_power_table() fails, it might call r600_free_extended_power_table() as will ci_dpm_fini() later during error handling. Fix this by only freeing pointers to objects previously allocated. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: cc8dbbb4f62a ("drm/radeon: add dpm support for CI dGPUs (v2)") Co-developed-by: Natalia Petrova Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/ci_dpm.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 886e9959496f..f98df826972c 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5541,6 +5541,7 @@ static int ci_parse_power_table(struct radeon_device *rdev) u8 frev, crev; u8 *power_state_offset; struct ci_ps *ps; + int ret; if (!atom_parse_data_header(mode_info->atom_context, index, NULL, &frev, &crev, &data_offset)) @@ -5570,11 +5571,15 @@ static int ci_parse_power_table(struct radeon_device *rdev) non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; - if (!rdev->pm.power_state[i].clock_info) - return -EINVAL; + if (!rdev->pm.power_state[i].clock_info) { + ret = -EINVAL; + goto err_free_ps; + } ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL); - if (ps == NULL) - return -ENOMEM; + if (ps == NULL) { + ret = -ENOMEM; + goto err_free_ps; + } rdev->pm.dpm.ps[i].ps_priv = ps; ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i], non_clock_info, @@ -5614,6 +5619,12 @@ static int ci_parse_power_table(struct radeon_device *rdev) } return 0; + +err_free_ps: + for (i = 0; i < rdev->pm.dpm.num_ps; i++) + kfree(rdev->pm.dpm.ps[i].ps_priv); + kfree(rdev->pm.dpm.ps); + return ret; } static int ci_get_vbios_boot_values(struct radeon_device *rdev, @@ -5702,25 +5713,26 @@ int ci_dpm_init(struct radeon_device *rdev) ret = ci_get_vbios_boot_values(rdev, &pi->vbios_boot_state); if (ret) { - ci_dpm_fini(rdev); + kfree(rdev->pm.dpm.priv); return ret; } ret = r600_get_platform_caps(rdev); if (ret) { - ci_dpm_fini(rdev); + kfree(rdev->pm.dpm.priv); return ret; } ret = r600_parse_extended_power_table(rdev); if (ret) { - ci_dpm_fini(rdev); + kfree(rdev->pm.dpm.priv); return ret; } ret = ci_parse_power_table(rdev); if (ret) { - ci_dpm_fini(rdev); + kfree(rdev->pm.dpm.priv); + r600_free_extended_power_table(rdev); return ret; } From 2780d5844855c7d5e245d273dac272ba1c00d86b Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 2 May 2019 13:21:48 -0400 Subject: [PATCH 094/509] drm/amd/display: Explicitly specify update type per plane info change [ Upstream commit 710cc1e7cd461446a9325c9bd1e9a54daa462952 ] [Why] The bit for flip addr is being set causing the determination for FAST vs MEDIUM to always return MEDIUM when plane info is provided as a surface update. This causes extreme stuttering for the typical atomic update path on Linux. [How] Don't use update_flags->raw for determining FAST vs MEDIUM. It's too fragile to changes like this. Explicitly specify the update type per update flag instead. It's not as clever as checking the bits itself but at least it's correct. Fixes: aa5fdb1ab5b6 ("drm/amd/display: Explicitly specify update type per plane info change") Reviewed-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7e0a55aa2b18..099542dd3154 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1855,9 +1855,6 @@ static enum surface_update_type det_surface_update(const struct dc *dc, enum surface_update_type overall_type = UPDATE_TYPE_FAST; union surface_update_flags *update_flags = &u->surface->update_flags; - if (u->flip_addr) - update_flags->bits.addr_update = 1; - if (!is_surface_in_context(context, u->surface) || u->surface->force_full_update) { update_flags->raw = 0xFFFFFFFF; return UPDATE_TYPE_FULL; From 4e0fd4f54beaeb92fe697cf2d1146a7105c1f84c Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Mon, 1 May 2023 17:01:45 -0700 Subject: [PATCH 095/509] Input: drv260x - sleep between polling GO bit [ Upstream commit efef661dfa6bf8cbafe4cd6a97433fcef0118967 ] When doing the initial startup there's no need to poll without any delay and spam the I2C bus. Let's sleep 15ms between each attempt, which is the same time as used in the vendor driver. Fixes: 7132fe4f5687 ("Input: drv260x - add TI drv260x haptics driver") Signed-off-by: Luca Weiss Link: https://lore.kernel.org/r/20230430-drv260x-improvements-v1-2-1fb28b4cc698@z3ntu.xyz Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/drv260x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c index 79d7fa710a71..54002d1a446b 100644 --- a/drivers/input/misc/drv260x.c +++ b/drivers/input/misc/drv260x.c @@ -435,6 +435,7 @@ static int drv260x_init(struct drv260x_data *haptics) } do { + usleep_range(15000, 15500); error = regmap_read(haptics->regmap, DRV260X_GO, &cal_buf); if (error) { dev_err(&haptics->client->dev, From 1b4f23fdf27f537489a823e0b842de7d1caeab6d Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 27 Apr 2023 16:29:26 +0200 Subject: [PATCH 096/509] drm/bridge: tc358768: always enable HS video mode [ Upstream commit 75a8aeac2573ab258c53676eba9b3796ea691988 ] Always enable HS video mode setting the TXMD bit, without this change no video output is present with DSI sinks that are setting MIPI_DSI_MODE_LPM flag (tested with LT8912B DSI-HDMI bridge). Previously the driver was enabling HS mode only when the DSI sink was not explicitly setting the MIPI_DSI_MODE_LPM, however this is not correct. The MIPI_DSI_MODE_LPM is supposed to indicate that the sink is willing to receive data in low power mode, however clearing the TC358768_DSI_CONTROL_TXMD bit will make the TC358768 send video in LP mode that is not the intended behavior. Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-2-francesco@dolcini.it Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index 8ed8302d6bbb..e65af025a771 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -819,8 +819,7 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) val = TC358768_DSI_CONFW_MODE_SET | TC358768_DSI_CONFW_ADDR_DSI_CONTROL; val |= (dsi_dev->lanes - 1) << 1; - if (!(dsi_dev->mode_flags & MIPI_DSI_MODE_LPM)) - val |= TC358768_DSI_CONTROL_TXMD; + val |= TC358768_DSI_CONTROL_TXMD; if (!(dsi_dev->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)) val |= TC358768_DSI_CONTROL_HSCKMD; From 825b00c685893bddcf7b261d47927d94d589f476 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 27 Apr 2023 16:29:27 +0200 Subject: [PATCH 097/509] drm/bridge: tc358768: fix PLL parameters computation [ Upstream commit 6a4020b4c63911977aaf8047f904a300d15de739 ] According to Toshiba documentation the PLL input clock after the divider should be not less than 4MHz, fix the PLL parameters computation accordingly. Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-3-francesco@dolcini.it Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index e65af025a771..d9021e750940 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -329,13 +329,17 @@ static int tc358768_calc_pll(struct tc358768_priv *priv, u32 fbd; for (fbd = 0; fbd < 512; ++fbd) { - u32 pll, diff; + u32 pll, diff, pll_in; pll = (u32)div_u64((u64)refclk * (fbd + 1), divisor); if (pll >= max_pll || pll < min_pll) continue; + pll_in = (u32)div_u64((u64)refclk, prd + 1); + if (pll_in < 4000000) + continue; + diff = max(pll, target_pll) - min(pll, target_pll); if (diff < best_diff) { From 46b74171898981a308cabe718b622ea1cc132f64 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 27 Apr 2023 16:29:28 +0200 Subject: [PATCH 098/509] drm/bridge: tc358768: fix PLL target frequency [ Upstream commit ffd2e4bbea626d565b9817312b0fcfb382fecb88 ] Correctly compute the PLL target frequency, the current formula works correctly only when the input bus width is 24bit, actually to properly compute the PLL target frequency what is relevant is the bits-per-pixel on the DSI link. No regression expected since the DSI format is currently hard-coded as MIPI_DSI_FMT_RGB888. Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-4-francesco@dolcini.it Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index d9021e750940..4aec4b428189 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -147,6 +147,7 @@ struct tc358768_priv { u32 pd_lines; /* number of Parallel Port Input Data Lines */ u32 dsi_lanes; /* number of DSI Lanes */ + u32 dsi_bpp; /* number of Bits Per Pixel over DSI */ /* Parameters for PLL programming */ u32 fbd; /* PLL feedback divider */ @@ -279,12 +280,12 @@ static void tc358768_hw_disable(struct tc358768_priv *priv) static u32 tc358768_pll_to_pclk(struct tc358768_priv *priv, u32 pll_clk) { - return (u32)div_u64((u64)pll_clk * priv->dsi_lanes, priv->pd_lines); + return (u32)div_u64((u64)pll_clk * priv->dsi_lanes, priv->dsi_bpp); } static u32 tc358768_pclk_to_pll(struct tc358768_priv *priv, u32 pclk) { - return (u32)div_u64((u64)pclk * priv->pd_lines, priv->dsi_lanes); + return (u32)div_u64((u64)pclk * priv->dsi_bpp, priv->dsi_lanes); } static int tc358768_calc_pll(struct tc358768_priv *priv, @@ -421,6 +422,7 @@ static int tc358768_dsi_host_attach(struct mipi_dsi_host *host, priv->output.panel = panel; priv->dsi_lanes = dev->lanes; + priv->dsi_bpp = mipi_dsi_pixel_format_to_bpp(dev->format); /* get input ep (port0/endpoint0) */ ret = -EINVAL; @@ -432,7 +434,7 @@ static int tc358768_dsi_host_attach(struct mipi_dsi_host *host, } if (ret) - priv->pd_lines = mipi_dsi_pixel_format_to_bpp(dev->format); + priv->pd_lines = priv->dsi_bpp; drm_bridge_add(&priv->bridge); From 43b2d11ccffbce890d7a190498947e8c0b541a23 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 27 Apr 2023 16:29:29 +0200 Subject: [PATCH 099/509] drm/bridge: tc358768: fix TCLK_ZEROCNT computation [ Upstream commit f9cf811374f42fca31ac34aaf59ee2ae72b89879 ] Correct computation of TCLK_ZEROCNT register. This register must be set to a value that ensure that (TCLK-PREPARECNT + TCLK-ZERO) > 300ns with the actual value of (TCLK-PREPARECNT + TCLK-ZERO) being (1 to 2) + (TCLK_ZEROCNT + 1)) x HSByteClkCycle + (PHY output delay) with PHY output delay being about (2 to 3) x MIPIBitClk cycle in the BitClk conversion. Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-5-francesco@dolcini.it Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index 4aec4b428189..facd4dab433b 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -725,10 +725,10 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) /* 38ns < TCLK_PREPARE < 95ns */ val = tc358768_ns_to_cnt(65, dsibclk_nsk) - 1; - /* TCLK_PREPARE > 300ns */ - val2 = tc358768_ns_to_cnt(300 + tc358768_to_ns(3 * ui_nsk), - dsibclk_nsk); - val |= (val2 - tc358768_to_ns(phy_delay_nsk - dsibclk_nsk)) << 8; + /* TCLK_PREPARE + TCLK_ZERO > 300ns */ + val2 = tc358768_ns_to_cnt(300 - tc358768_to_ns(2 * ui_nsk), + dsibclk_nsk) - 2; + val |= val2 << 8; dev_dbg(priv->dev, "TCLK_HEADERCNT: 0x%x\n", val); tc358768_write(priv, TC358768_TCLK_HEADERCNT, val); From 33abcfbb17b0a7b014161cf3a270d98a1cfa5eb7 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 30 Mar 2023 11:59:41 +0200 Subject: [PATCH 100/509] drm/bridge: tc358768: Add atomic_get_input_bus_fmts() implementation [ Upstream commit cec5ccef85bd0128cf895612de54a9d21d2015d0 ] Add atomic_get_input_bus_fmts() implementation, tc358768 has a parallel RGB input interface with the actual bus format depending on the amount of parallel input data lines. Without this change when the tc358768 is used with less than 24bit the color mapping is completely wrong. Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230330095941.428122-7-francesco@dolcini.it Stable-dep-of: ee18698e212b ("drm/bridge: tc358768: fix TCLK_TRAILCNT computation") Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index facd4dab433b..f6c0300090ec 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -871,6 +872,44 @@ static void tc358768_bridge_enable(struct drm_bridge *bridge) } } +#define MAX_INPUT_SEL_FORMATS 1 + +static u32 * +tc358768_atomic_get_input_bus_fmts(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + u32 output_fmt, + unsigned int *num_input_fmts) +{ + struct tc358768_priv *priv = bridge_to_tc358768(bridge); + u32 *input_fmts; + + *num_input_fmts = 0; + + input_fmts = kcalloc(MAX_INPUT_SEL_FORMATS, sizeof(*input_fmts), + GFP_KERNEL); + if (!input_fmts) + return NULL; + + switch (priv->pd_lines) { + case 16: + input_fmts[0] = MEDIA_BUS_FMT_RGB565_1X16; + break; + case 18: + input_fmts[0] = MEDIA_BUS_FMT_RGB666_1X18; + break; + default: + case 24: + input_fmts[0] = MEDIA_BUS_FMT_RGB888_1X24; + break; + }; + + *num_input_fmts = MAX_INPUT_SEL_FORMATS; + + return input_fmts; +} + static const struct drm_bridge_funcs tc358768_bridge_funcs = { .attach = tc358768_bridge_attach, .mode_valid = tc358768_bridge_mode_valid, @@ -878,6 +917,11 @@ static const struct drm_bridge_funcs tc358768_bridge_funcs = { .enable = tc358768_bridge_enable, .disable = tc358768_bridge_disable, .post_disable = tc358768_bridge_post_disable, + + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_get_input_bus_fmts = tc358768_atomic_get_input_bus_fmts, }; static const struct drm_bridge_timings default_tc358768_timings = { From 6b9450723babe5af24cf16574f21052a5b458f90 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 27 Apr 2023 16:29:30 +0200 Subject: [PATCH 101/509] drm/bridge: tc358768: fix TCLK_TRAILCNT computation [ Upstream commit ee18698e212b1659dd0850d7e2ae0f22e16ed3d3 ] Correct computation of TCLK_TRAILCNT register. The driver does not implement non-continuous clock mode, so the actual value doesn't make a practical difference yet. However this change also ensures that the value does not write to reserved registers bits in case of under/overflow. This register must be set to a value that ensures that TCLK-TRAIL > 60ns and TEOT <= (105 ns + 12 x UI) with the actual value of TCLK-TRAIL being (TCLK_TRAILCNT + (1 to 2)) xHSByteClkCycle + (2 + (1 to 2)) * HSBYTECLKCycle - (PHY output delay) with PHY output delay being about (2 to 3) x MIPIBitClk cycle in the BitClk conversion. Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-2-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-3-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-4-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-5-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-2-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-3-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-4-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-5-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-2-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-3-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-4-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-5-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-2-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-3-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-4-francesco@dolcini.it Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-5-francesco@dolcini.it Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index f6c0300090ec..b7372c5b0b81 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -633,6 +634,7 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) struct tc358768_priv *priv = bridge_to_tc358768(bridge); struct mipi_dsi_device *dsi_dev = priv->output.dev; u32 val, val2, lptxcnt, hact, data_type; + s32 raw_val; const struct drm_display_mode *mode; u32 dsibclk_nsk, dsiclk_nsk, ui_nsk, phy_delay_nsk; u32 dsiclk, dsibclk; @@ -733,9 +735,9 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) dev_dbg(priv->dev, "TCLK_HEADERCNT: 0x%x\n", val); tc358768_write(priv, TC358768_TCLK_HEADERCNT, val); - /* TCLK_TRAIL > 60ns + 3*UI */ - val = 60 + tc358768_to_ns(3 * ui_nsk); - val = tc358768_ns_to_cnt(val, dsibclk_nsk) - 5; + /* TCLK_TRAIL > 60ns AND TEOT <= 105 ns + 12*UI */ + raw_val = tc358768_ns_to_cnt(60 + tc358768_to_ns(2 * ui_nsk), dsibclk_nsk) - 5; + val = clamp(raw_val, 0, 127); dev_dbg(priv->dev, "TCLK_TRAILCNT: 0x%x\n", val); tc358768_write(priv, TC358768_TCLK_TRAILCNT, val); From 8e47328fe089ab0b125e6da172267dc6b040053a Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 27 Apr 2023 16:29:31 +0200 Subject: [PATCH 102/509] drm/bridge: tc358768: fix THS_ZEROCNT computation [ Upstream commit 77a089328da791118af9692543a5eedc79eb5fd4 ] Correct computation of THS_ZEROCNT register. This register must be set to a value that ensure that THS_PREPARE + THS_ZERO > 145ns + 10*UI with the actual value of (THS_PREPARE + THS_ZERO) being ((1 to 2) + 1 + (TCLK_ZEROCNT + 1) + (3 to 4)) x ByteClk cycle + + HSByteClk x (2 + (1 to 2)) + (PHY delay) with PHY delay being about (8 + (5 to 6)) x MIPIBitClk cycle in the BitClk conversion. Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-7-francesco@dolcini.it Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index b7372c5b0b81..a35674b6ff24 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -744,9 +744,10 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) /* 40ns + 4*UI < THS_PREPARE < 85ns + 6*UI */ val = 50 + tc358768_to_ns(4 * ui_nsk); val = tc358768_ns_to_cnt(val, dsibclk_nsk) - 1; - /* THS_ZERO > 145ns + 10*UI */ - val2 = tc358768_ns_to_cnt(145 - tc358768_to_ns(ui_nsk), dsibclk_nsk); - val |= (val2 - tc358768_to_ns(phy_delay_nsk)) << 8; + /* THS_PREPARE + THS_ZERO > 145ns + 10*UI */ + raw_val = tc358768_ns_to_cnt(145 - tc358768_to_ns(3 * ui_nsk), dsibclk_nsk) - 10; + val2 = clamp(raw_val, 0, 127); + val |= val2 << 8; dev_dbg(priv->dev, "THS_HEADERCNT: 0x%x\n", val); tc358768_write(priv, TC358768_THS_HEADERCNT, val); From f2f7d0a4a22a7d8ad24ecf54897255b579245574 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 27 Apr 2023 16:29:32 +0200 Subject: [PATCH 103/509] drm/bridge: tc358768: fix TXTAGOCNT computation [ Upstream commit 3666aad8185af8d0ce164fd3c4974235417d6d0b ] Correct computation of TXTAGOCNT register. This register must be set to a value that ensure that the TTA-GO period = (4 x TLPX) with the actual value of TTA-GO being 4 x (TXTAGOCNT + 1) x (HSByteClk cycle) Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-8-francesco@dolcini.it Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index a35674b6ff24..40fffce680c5 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -779,7 +779,7 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) /* TXTAGOCNT[26:16] RXTASURECNT[10:0] */ val = tc358768_to_ns((lptxcnt + 1) * dsibclk_nsk * 4); - val = tc358768_ns_to_cnt(val, dsibclk_nsk) - 1; + val = tc358768_ns_to_cnt(val, dsibclk_nsk) / 4 - 1; val2 = tc358768_ns_to_cnt(tc358768_to_ns((lptxcnt + 1) * dsibclk_nsk), dsibclk_nsk) - 2; val |= val2 << 16; From 46a34e145955480a19abf609815a82d1ca73e2ea Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 27 Apr 2023 16:29:33 +0200 Subject: [PATCH 104/509] drm/bridge: tc358768: fix THS_TRAILCNT computation [ Upstream commit bac7842cd179572e8e0fc2d7b5254e40c6e9e057 ] Correct computation of THS_TRAILCNT register. This register must be set to a value that ensure that THS_TRAIL > 60 ns + 4 x UI and THS_TRAIL > 8 x UI and THS_TRAIL < TEOT with TEOT = 105 ns + (12 x UI) with the actual value of THS_TRAIL being (1 + THS_TRAILCNT) x ByteClk cycle + ((1 to 2) + 2) xHSBYTECLK cycle + - (PHY output delay) with PHY output delay being about (8 + (5 to 6)) x MIPIBitClk cycle in the BitClk conversion. Fixes: ff1ca6397b1d ("drm/bridge: Add tc358768 driver") Signed-off-by: Francesco Dolcini Reviewed-by: Robert Foss Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20230427142934.55435-9-francesco@dolcini.it Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/tc358768.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index 40fffce680c5..b4a69b210451 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -763,9 +763,10 @@ static void tc358768_bridge_pre_enable(struct drm_bridge *bridge) dev_dbg(priv->dev, "TCLK_POSTCNT: 0x%x\n", val); tc358768_write(priv, TC358768_TCLK_POSTCNT, val); - /* 60ns + 4*UI < THS_PREPARE < 105ns + 12*UI */ - val = tc358768_ns_to_cnt(60 + tc358768_to_ns(15 * ui_nsk), - dsibclk_nsk) - 5; + /* max(60ns + 4*UI, 8*UI) < THS_TRAILCNT < 105ns + 12*UI */ + raw_val = tc358768_ns_to_cnt(60 + tc358768_to_ns(18 * ui_nsk), + dsibclk_nsk) - 4; + val = clamp(raw_val, 0, 15); dev_dbg(priv->dev, "THS_TRAILCNT: 0x%x\n", val); tc358768_write(priv, TC358768_THS_TRAILCNT, val); From 20ecae1af578ca741e6cfd0479f7e8f452745d36 Mon Sep 17 00:00:00 2001 From: Luc Ma Date: Mon, 8 May 2023 08:09:16 +0800 Subject: [PATCH 105/509] drm/vram-helper: fix function names in vram helper doc [ Upstream commit b8e392245105b50706f18418054821e71e637288 ] Refer to drmm_vram_helper_init() instead of the non-existent drmm_vram_helper_alloc_mm(). Fixes: a5f23a72355d ("drm/vram-helper: Managed vram helpers") Signed-off-by: Luc Ma Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/64583db2.630a0220.eb75d.8f51@mx.google.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_gem_vram_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index 375c79e23ca5..eb104de16fa7 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -41,7 +41,7 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs; * the frame's scanout buffer or the cursor image. If there's no more space * left in VRAM, inactive GEM objects can be moved to system memory. * - * To initialize the VRAM helper library call drmm_vram_helper_alloc_mm(). + * To initialize the VRAM helper library call drmm_vram_helper_init(). * The function allocates and initializes an instance of &struct drm_vram_mm * in &struct drm_device.vram_mm . Use &DRM_GEM_VRAM_DRIVER to initialize * &struct drm_driver and &DRM_VRAM_MM_FILE_OPERATIONS to initialize @@ -69,7 +69,7 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs; * // setup device, vram base and size * // ... * - * ret = drmm_vram_helper_alloc_mm(dev, vram_base, vram_size); + * ret = drmm_vram_helper_init(dev, vram_base, vram_size); * if (ret) * return ret; * return 0; @@ -82,7 +82,7 @@ static const struct drm_gem_object_funcs drm_gem_vram_object_funcs; * to userspace. * * You don't have to clean up the instance of VRAM MM. - * drmm_vram_helper_alloc_mm() is a managed interface that installs a + * drmm_vram_helper_init() is a managed interface that installs a * clean-up handler to run during the DRM device's release. * * For drawing or scanout operations, rsp. buffer objects have to be pinned From bd46ade71497faaf8f11e861cf294b155f3b2047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Wed, 3 May 2023 14:28:30 +0200 Subject: [PATCH 106/509] ARM: dts: BCM5301X: Drop "clock-names" from the SPI node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d3c8e2c5757153bbfad70019ec1decbca86f3def ] There is no such property in the SPI controller binding documentation. Also Linux driver doesn't look for it. This fixes: arch/arm/boot/dts/bcm4708-asus-rt-ac56u.dtb: spi@18029200: Unevaluated properties are not allowed ('clock-names' was unexpected) From schema: Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.yaml Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20230503122830.3200-1-zajec5@gmail.com Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/bcm5301x.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 9fdad20c40d1..4e9bb10f37d0 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -532,7 +532,6 @@ spi@18029200 { "spi_lr_session_done", "spi_lr_overread"; clocks = <&iprocmed>; - clock-names = "iprocmed"; num-cs = <2>; #address-cells = <1>; #size-cells = <0>; From e629efc6d60214cc3fd7987a6c8fb91a1aafc34d Mon Sep 17 00:00:00 2001 From: "hfdevel@gmx.net" Date: Fri, 7 Apr 2023 16:36:25 +0200 Subject: [PATCH 107/509] ARM: dts: meson8b: correct uart_B and uart_C clock references [ Upstream commit d542ce8d4769cdef6a7bc3437e59cfed9c68f0e4 ] With the current device tree for meson8b, uarts B (e.g. available on pins 8/10 on Odroid-C1) and C (pins 3/5 on Odroid-C1) do not work, because they are relying on incorrect clocks. Change the references of pclk to the correct CLKID, to allow use of the two uarts. Fixes: 3375aa77135f ("ARM: dts: meson8b: Fix the UART device-tree schema validation") Signed-off-by: Hans-Frieder Vogt Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/trinity-bf20bcb9-790b-4ab9-99e3-0831ef8257f4-1680878185420@3c-app-gmx-bap55 Signed-off-by: Neil Armstrong Signed-off-by: Sasha Levin --- arch/arm/boot/dts/meson8b.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index f6eb7c803174..af2454c9f77a 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -599,13 +599,13 @@ &uart_A { &uart_B { compatible = "amlogic,meson8b-uart"; - clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clocks = <&xtal>, <&clkc CLKID_UART1>, <&clkc CLKID_CLK81>; clock-names = "xtal", "pclk", "baud"; }; &uart_C { compatible = "amlogic,meson8b-uart"; - clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clocks = <&xtal>, <&clkc CLKID_UART2>, <&clkc CLKID_CLK81>; clock-names = "xtal", "pclk", "baud"; }; From 39305592dc97b361b0c7f0088c70b30b3babf4a8 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 10 May 2023 17:27:55 -0700 Subject: [PATCH 108/509] Input: adxl34x - do not hardcode interrupt trigger type [ Upstream commit e96220bce5176ed2309f77f061dcc0430b82b25e ] Instead of hardcoding IRQ trigger type to IRQF_TRIGGER_HIGH, let's respect the settings specified in the firmware description. Fixes: e27c729219ad ("Input: add driver for ADXL345/346 Digital Accelerometers") Signed-off-by: Marek Vasut Acked-by: Michael Hennerich Link: https://lore.kernel.org/r/20230509203555.549158-1-marex@denx.de Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/adxl34x.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/input/misc/adxl34x.c b/drivers/input/misc/adxl34x.c index 4cc4e8ff42b3..ad035c342cd3 100644 --- a/drivers/input/misc/adxl34x.c +++ b/drivers/input/misc/adxl34x.c @@ -811,8 +811,7 @@ struct adxl34x *adxl34x_probe(struct device *dev, int irq, AC_WRITE(ac, POWER_CTL, 0); err = request_threaded_irq(ac->irq, NULL, adxl34x_irq, - IRQF_TRIGGER_HIGH | IRQF_ONESHOT, - dev_name(dev), ac); + IRQF_ONESHOT, dev_name(dev), ac); if (err) { dev_err(dev, "irq %d busy?\n", ac->irq); goto err_free_mem; From 3c87c98225be2d551f01742eca587f6416c13ec8 Mon Sep 17 00:00:00 2001 From: XuDong Liu Date: Sun, 30 Apr 2023 19:23:46 +0800 Subject: [PATCH 109/509] drm: sun4i_tcon: use devm_clk_get_enabled in `sun4i_tcon_init_clocks` [ Upstream commit 123ee07ba5b7123e0ce0e0f9d64938026c16a2ce ] Smatch reports: drivers/gpu/drm/sun4i/sun4i_tcon.c:805 sun4i_tcon_init_clocks() warn: 'tcon->clk' from clk_prepare_enable() not released on lines: 792,801. In the function sun4i_tcon_init_clocks(), tcon->clk and tcon->sclk0 are not disabled in the error handling, which affects the release of these variable. Although sun4i_tcon_bind(), which calls sun4i_tcon_init_clocks(), use sun4i_tcon_free_clocks to disable the variables mentioned, but the error handling branch of sun4i_tcon_init_clocks() ignores the required disable process. To fix this issue, use the devm_clk_get_enabled to automatically balance enable and disabled calls. As original implementation use sun4i_tcon_free_clocks() to disable clk explicitly, we delete the related calls and error handling that are no longer needed. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Fixes: b14e945bda8a ("drm/sun4i: tcon: Prepare and enable TCON channel 0 clock at init") Fixes: 8e9240472522 ("drm/sun4i: support TCONs without channel 1") Fixes: 34d698f6e349 ("drm/sun4i: Add has_channel_0 TCON quirk") Signed-off-by: XuDong Liu Reviewed-by: Dongliang Mu Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20230430112347.4689-1-m202071377@hust.edu.cn Signed-off-by: Sasha Levin --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 9f06dec0fc61..bb43196d5d83 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -777,21 +777,19 @@ static irqreturn_t sun4i_tcon_handler(int irq, void *private) static int sun4i_tcon_init_clocks(struct device *dev, struct sun4i_tcon *tcon) { - tcon->clk = devm_clk_get(dev, "ahb"); + tcon->clk = devm_clk_get_enabled(dev, "ahb"); if (IS_ERR(tcon->clk)) { dev_err(dev, "Couldn't get the TCON bus clock\n"); return PTR_ERR(tcon->clk); } - clk_prepare_enable(tcon->clk); if (tcon->quirks->has_channel_0) { - tcon->sclk0 = devm_clk_get(dev, "tcon-ch0"); + tcon->sclk0 = devm_clk_get_enabled(dev, "tcon-ch0"); if (IS_ERR(tcon->sclk0)) { dev_err(dev, "Couldn't get the TCON channel 0 clock\n"); return PTR_ERR(tcon->sclk0); } } - clk_prepare_enable(tcon->sclk0); if (tcon->quirks->has_channel_1) { tcon->sclk1 = devm_clk_get(dev, "tcon-ch1"); @@ -804,12 +802,6 @@ static int sun4i_tcon_init_clocks(struct device *dev, return 0; } -static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon) -{ - clk_disable_unprepare(tcon->sclk0); - clk_disable_unprepare(tcon->clk); -} - static int sun4i_tcon_init_irq(struct device *dev, struct sun4i_tcon *tcon) { @@ -1224,14 +1216,14 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master, ret = sun4i_tcon_init_regmap(dev, tcon); if (ret) { dev_err(dev, "Couldn't init our TCON regmap\n"); - goto err_free_clocks; + goto err_assert_reset; } if (tcon->quirks->has_channel_0) { ret = sun4i_dclk_create(dev, tcon); if (ret) { dev_err(dev, "Couldn't create our TCON dot clock\n"); - goto err_free_clocks; + goto err_assert_reset; } } @@ -1294,8 +1286,6 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master, err_free_dotclock: if (tcon->quirks->has_channel_0) sun4i_dclk_free(tcon); -err_free_clocks: - sun4i_tcon_free_clocks(tcon); err_assert_reset: reset_control_assert(tcon->lcd_rst); return ret; @@ -1309,7 +1299,6 @@ static void sun4i_tcon_unbind(struct device *dev, struct device *master, list_del(&tcon->list); if (tcon->quirks->has_channel_0) sun4i_dclk_free(tcon); - sun4i_tcon_free_clocks(tcon); } static const struct component_ops sun4i_tcon_ops = { From 617a4da09d773e59ea74d9039772f6cbc6fec6be Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 7 May 2023 20:26:38 +0300 Subject: [PATCH 110/509] drm/panel: sharp-ls043t1le01: adjust mode settings [ Upstream commit dee23b2c9e3ff46d59c5d45e1436eceb878e7c9a ] Using current settings causes panel flickering on APQ8074 dragonboard. Adjust panel settings to follow the vendor-provided mode. This also enables MIPI_DSI_MODE_VIDEO_SYNC_PULSE, which is also specified by the vendor dtsi for the mentioned dragonboard. Fixes: ee0172383190 ("drm/panel: Add Sharp LS043T1LE01 MIPI DSI panel") Signed-off-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230507172639.2320934-1-dmitry.baryshkov@linaro.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c index 16dbf0f353ed..1f5fb1547730 100644 --- a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c +++ b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c @@ -192,15 +192,15 @@ static int sharp_nt_panel_enable(struct drm_panel *panel) } static const struct drm_display_mode default_mode = { - .clock = 41118, + .clock = (540 + 48 + 32 + 80) * (960 + 3 + 10 + 15) * 60 / 1000, .hdisplay = 540, .hsync_start = 540 + 48, - .hsync_end = 540 + 48 + 80, - .htotal = 540 + 48 + 80 + 32, + .hsync_end = 540 + 48 + 32, + .htotal = 540 + 48 + 32 + 80, .vdisplay = 960, .vsync_start = 960 + 3, - .vsync_end = 960 + 3 + 15, - .vtotal = 960 + 3 + 15 + 1, + .vsync_end = 960 + 3 + 10, + .vtotal = 960 + 3 + 10 + 15, }; static int sharp_nt_panel_get_modes(struct drm_panel *panel, @@ -280,6 +280,7 @@ static int sharp_nt_panel_probe(struct mipi_dsi_device *dsi) dsi->lanes = 2; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | + MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_EOT_PACKET; From 8ee24ddf45f018cda76be99302362d5a6488e2c3 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 5 May 2023 23:37:29 +0200 Subject: [PATCH 111/509] ARM: dts: stm32: Move ethernet MAC EEPROM from SoM to carrier boards [ Upstream commit 9660efc2af37f3c12dc6e6a5511ad99e0addc297 ] The ethernet MAC EEPROM is not populated on the SoM itself, it has to be populated on each carrier board. Move the EEPROM into the correct place in DTs, i.e. the carrier board DTs. Add label to the EEPROM too. Fixes: 7e76f82acd9e1 ("ARM: dts: stm32: Split Avenger96 into DHCOR SoM and Avenger96 board") Signed-off-by: Marek Vasut Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi | 6 ++++++ arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi index 723b39bb2129..c43cf62736a6 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi @@ -232,6 +232,12 @@ adv7513_i2s0: endpoint { }; }; }; + + dh_mac_eeprom: eeprom@53 { + compatible = "atmel,24c02"; + reg = <0x53>; + pagesize = <16>; + }; }; <dc { diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi index 5af32140e128..7dba02e9ba6d 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi @@ -167,12 +167,6 @@ watchdog { status = "disabled"; }; }; - - eeprom@53 { - compatible = "atmel,24c02"; - reg = <0x53>; - pagesize = <16>; - }; }; &iwdg2 { From 726fdf47c148f0c2daa03483e41b04c3ac3e1de9 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 17 May 2023 10:04:16 +0300 Subject: [PATCH 112/509] bus: ti-sysc: Fix dispc quirk masking bool variables [ Upstream commit f620596fa347170852da499e778a5736d79a4b79 ] Fix warning drivers/bus/ti-sysc.c:1806 sysc_quirk_dispc() warn: masking a bool. While at it let's add a comment for what were doing to make the code a bit easier to follow. Fixes: 7324a7a0d5e2 ("bus: ti-sysc: Implement display subsystem reset quirk") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-omap/a8ec8a68-9c2c-4076-bf47-09fccce7659f@kili.mountain/ Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- drivers/bus/ti-sysc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 4ee20be76508..4b1641fe30db 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1748,7 +1748,7 @@ static u32 sysc_quirk_dispc(struct sysc *ddata, int dispc_offset, if (!ddata->module_va) return -EIO; - /* DISP_CONTROL */ + /* DISP_CONTROL, shut down lcd and digit on disable if enabled */ val = sysc_read(ddata, dispc_offset + 0x40); lcd_en = val & lcd_en_mask; digit_en = val & digit_en_mask; @@ -1760,7 +1760,7 @@ static u32 sysc_quirk_dispc(struct sysc *ddata, int dispc_offset, else irq_mask |= BIT(2) | BIT(3); /* EVSYNC bits */ } - if (disable & (lcd_en | digit_en)) + if (disable && (lcd_en || digit_en)) sysc_write(ddata, dispc_offset + 0x40, val & ~(lcd_en_mask | digit_en_mask)); From 2a9895df808872c3f23ef0808e216a2a56c170c3 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Tue, 21 Feb 2023 11:50:37 +0100 Subject: [PATCH 113/509] arm64: dts: microchip: sparx5: do not use PSCI on reference boards [ Upstream commit 70be83708c925b3f72c508e4756e48ad2330c830 ] PSCI is not implemented on SparX-5 at all, there is no ATF and U-boot that is shipped does not implement it as well. I have tried flashing the latest BSP 2022.12 U-boot which did not work. After contacting Microchip, they confirmed that there is no ATF for the SoC nor PSCI implementation which is unfortunate in 2023. So, disable PSCI as otherwise kernel crashes as soon as it tries probing PSCI with, and the crash is only visible if earlycon is used. Since PSCI is not implemented, switch core bringup to use spin-tables which are implemented in the vendor U-boot and actually work. Tested on PCB134 with eMMC (VSC5640EV). Fixes: 6694aee00a4b ("arm64: dts: sparx5: Add basic cpu support") Signed-off-by: Robert Marko Acked-by: Steen Hegelund Link: https://lore.kernel.org/r/20230221105039.316819-1-robert.marko@sartura.hr Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/microchip/sparx5.dtsi | 2 +- arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/microchip/sparx5.dtsi b/arch/arm64/boot/dts/microchip/sparx5.dtsi index 3cb01c39c3c8..8dd679fbeed1 100644 --- a/arch/arm64/boot/dts/microchip/sparx5.dtsi +++ b/arch/arm64/boot/dts/microchip/sparx5.dtsi @@ -61,7 +61,7 @@ arm-pmu { interrupt-affinity = <&cpu0>, <&cpu1>; }; - psci { + psci: psci { compatible = "arm,psci-0.2"; method = "smc"; }; diff --git a/arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi b/arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi index 9d1a082de3e2..32bb76b3202a 100644 --- a/arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi +++ b/arch/arm64/boot/dts/microchip/sparx5_pcb_common.dtsi @@ -6,6 +6,18 @@ /dts-v1/; #include "sparx5.dtsi" +&psci { + status = "disabled"; +}; + +&cpu0 { + enable-method = "spin-table"; +}; + +&cpu1 { + enable-method = "spin-table"; +}; + &uart0 { status = "okay"; }; From 7080ef46ad3da5ff592bec173829e5f8cefca955 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 18 May 2023 23:48:11 -0700 Subject: [PATCH 114/509] RDMA/bnxt_re: Disable/kill tasklet only if it is enabled [ Upstream commit ab112ee7899d6171da5acd77a7ed7ae103f488de ] When the ulp hook to start the IRQ fails because the rings are not available, tasklets are not enabled. In this case when the driver is unloaded, driver calls CREQ tasklet_kill. This causes an indefinite hang as the tasklet is not enabled. Driver shouldn't call tasklet_kill if it is not enabled. So using the creq->requested and nq->requested flags to identify if both tasklets/irqs are registered. Checking this flag while scheduling the tasklet from ISR. Also, added a cleanup for disabling tasklet, in case request_irq fails during start_irq. Check for return value for bnxt_qplib_rcfw_start_irq and in case the bnxt_qplib_rcfw_start_irq fails, return bnxt_re_start_irq without attempting to start NQ IRQs. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/1684478897-12247-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 12 +++++++++--- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 16 ++++++++++------ drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 14 +++++++++----- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 9ef6aea29ff1..bdde44286d56 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -294,15 +294,21 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) for (indx = 0; indx < rdev->num_msix; indx++) rdev->msix_entries[indx].vector = ent[indx].vector; - bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, - false); + rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, + false); + if (rc) { + ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n"); + return; + } for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) { nq = &rdev->nq[indx - 1]; rc = bnxt_qplib_nq_start_irq(nq, indx - 1, msix_ent[indx].vector, false); - if (rc) + if (rc) { ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n", indx - 1); + return; + } } } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index b26a89187a19..9eba4b39c703 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -404,6 +404,9 @@ static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance) void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) { + if (!nq->requested) + return; + tasklet_disable(&nq->nq_tasklet); /* Mask h/w interrupt */ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false); @@ -411,11 +414,10 @@ void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) synchronize_irq(nq->msix_vec); if (kill) tasklet_kill(&nq->nq_tasklet); - if (nq->requested) { - irq_set_affinity_hint(nq->msix_vec, NULL); - free_irq(nq->msix_vec, nq); - nq->requested = false; - } + + irq_set_affinity_hint(nq->msix_vec, NULL); + free_irq(nq->msix_vec, nq); + nq->requested = false; } void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) @@ -454,8 +456,10 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx); rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq); - if (rc) + if (rc) { + tasklet_disable(&nq->nq_tasklet); return rc; + } cpumask_clear(&nq->mask); cpumask_set_cpu(nq_indx, &nq->mask); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 5759027914b0..a111e880276f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -633,6 +633,10 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) struct bnxt_qplib_creq_ctx *creq; creq = &rcfw->creq; + + if (!creq->requested) + return; + tasklet_disable(&creq->creq_tasklet); /* Mask h/w interrupts */ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false); @@ -641,10 +645,8 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) if (kill) tasklet_kill(&creq->creq_tasklet); - if (creq->requested) { - free_irq(creq->msix_vec, rcfw); - creq->requested = false; - } + free_irq(creq->msix_vec, rcfw); + creq->requested = false; } void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) @@ -690,8 +692,10 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, tasklet_enable(&creq->creq_tasklet); rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0, "bnxt_qplib_creq", rcfw); - if (rc) + if (rc) { + tasklet_disable(&creq->creq_tasklet); return rc; + } creq->requested = true; bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true); From 11bd3882c3a6021bf73815343ee811558452091e Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 18 May 2023 23:48:12 -0700 Subject: [PATCH 115/509] RDMA/bnxt_re: Fix to remove unnecessary return labels [ Upstream commit 9b3ee47796f529e5bc31a355d6cb756d68a7079a ] If there is no cleanup needed then just return directly. This cleans up the code and improve readability. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/1684478897-12247-3-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Kashyap Desai Reviewed-by: Saravanan Vajravel Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 9eba4b39c703..b4b180652c0a 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1603,7 +1603,7 @@ static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, il_src = (void *)wqe->sg_list[indx].addr; t_len += len; if (t_len > qp->max_inline_data) - goto bad; + return -ENOMEM; while (len) { if (pull_dst) { pull_dst = false; @@ -1627,8 +1627,6 @@ static int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp, } return t_len; -bad: - return -ENOMEM; } static u32 bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq, @@ -2058,7 +2056,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) hwq_attr.sginfo = &cq->sg_info; rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr); if (rc) - goto exit; + return rc; RCFW_CMD_PREP(req, CREATE_CQ, cmd_flags); @@ -2099,7 +2097,6 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) fail: bnxt_qplib_free_hwq(res, &cq->hwq); -exit: return rc; } From b54b26ac50a2c9580bf0e3158ef609f038e324c1 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 18 May 2023 23:48:13 -0700 Subject: [PATCH 116/509] RDMA/bnxt_re: Use unique names while registering interrupts [ Upstream commit ff2e4bfd162cf66a112a81509e419805add44d64 ] bnxt_re currently uses the names "bnxt_qplib_creq" and "bnxt_qplib_nq-0" while registering IRQs. There is no way to distinguish the IRQs of different device ports when there are multiple IB devices registered. This could make the scenarios worse where one want to pin IRQs of a device port to certain CPUs. Fixed the code to use unique names which has PCI BDF information while registering interrupts like: "bnxt_re-nq-0@pci:0000:65:00.0" and "bnxt_re-creq@pci:0000:65:00.1". Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/1684478897-12247-4-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Bhargava Chenna Marreddy Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 12 ++++++++++-- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 2 +- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 15 +++++++++++++-- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 1 + 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index b4b180652c0a..d6b7c0d1f676 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -417,6 +417,8 @@ void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill) irq_set_affinity_hint(nq->msix_vec, NULL); free_irq(nq->msix_vec, nq); + kfree(nq->name); + nq->name = NULL; nq->requested = false; } @@ -443,6 +445,7 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq) int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, int msix_vector, bool need_init) { + struct bnxt_qplib_res *res = nq->res; int rc; if (nq->requested) @@ -454,9 +457,14 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, else tasklet_enable(&nq->nq_tasklet); - snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx); + nq->name = kasprintf(GFP_KERNEL, "bnxt_re-nq-%d@pci:%s", + nq_indx, pci_name(res->pdev)); + if (!nq->name) + return -ENOMEM; rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq); if (rc) { + kfree(nq->name); + nq->name = NULL; tasklet_disable(&nq->nq_tasklet); return rc; } @@ -470,7 +478,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, nq->msix_vec, nq_indx); } nq->requested = true; - bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true); + bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, res->cctx, true); return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index f50784405e27..667f93d90045 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -469,7 +469,7 @@ typedef int (*srqn_handler_t)(struct bnxt_qplib_nq *nq, struct bnxt_qplib_nq { struct pci_dev *pdev; struct bnxt_qplib_res *res; - char name[32]; + char *name; struct bnxt_qplib_hwq hwq; struct bnxt_qplib_nq_db nq_db; u16 ring_id; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index a111e880276f..4836bc433f53 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -646,6 +646,8 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill) tasklet_kill(&creq->creq_tasklet); free_irq(creq->msix_vec, rcfw); + kfree(creq->irq_name); + creq->irq_name = NULL; creq->requested = false; } @@ -678,9 +680,11 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, bool need_init) { struct bnxt_qplib_creq_ctx *creq; + struct bnxt_qplib_res *res; int rc; creq = &rcfw->creq; + res = rcfw->res; if (creq->requested) return -EFAULT; @@ -690,15 +694,22 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq); else tasklet_enable(&creq->creq_tasklet); + + creq->irq_name = kasprintf(GFP_KERNEL, "bnxt_re-creq@pci:%s", + pci_name(res->pdev)); + if (!creq->irq_name) + return -ENOMEM; rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0, - "bnxt_qplib_creq", rcfw); + creq->irq_name, rcfw); if (rc) { + kfree(creq->irq_name); + creq->irq_name = NULL; tasklet_disable(&creq->creq_tasklet); return rc; } creq->requested = true; - bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true); + bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true); return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 6953f4e53dd2..7df7170c80e0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -172,6 +172,7 @@ struct bnxt_qplib_creq_ctx { u16 ring_id; int msix_vec; bool requested; /*irq handler installed */ + char *irq_name; }; /* RCFW Communication Channels */ From 446092f136d319a8fdb734270b797f969b683944 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 18 May 2023 23:48:14 -0700 Subject: [PATCH 117/509] RDMA/bnxt_re: Remove a redundant check inside bnxt_re_update_gid [ Upstream commit b989f90cef0af48aa5679b6a75476371705ec53c ] The NULL check inside bnxt_re_update_gid() always return false. If sgid_tbl->tbl is not allocated, then dev_init would have failed. Fixes: 5fac5b1b297f ("RDMA/bnxt_re: Add vlan tag for untagged RoCE traffic when PFC is configured") Link: https://lore.kernel.org/r/1684478897-12247-5-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Saravanan Vajravel Reviewed-by: Damodharam Ammepalli Reviewed-by: Ajit Khaparde Signed-off-by: Selvin Xavier Signed-off-by: Kalesh AP Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index bdde44286d56..8a618769915d 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1191,12 +1191,6 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) if (!ib_device_try_get(&rdev->ibdev)) return 0; - if (!sgid_tbl) { - ibdev_err(&rdev->ibdev, "QPLIB: SGID table not allocated"); - rc = -EINVAL; - goto out; - } - for (index = 0; index < sgid_tbl->active; index++) { gid_idx = sgid_tbl->hw_id[index]; @@ -1214,7 +1208,7 @@ static int bnxt_re_update_gid(struct bnxt_re_dev *rdev) rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx, rdev->qplib_res.netdev->dev_addr); } -out: + ib_device_put(&rdev->ibdev); return rc; } From b0b180a712ee8b643f2d592608d2af45fd151e47 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Thu, 18 May 2023 23:48:15 -0700 Subject: [PATCH 118/509] RDMA/bnxt_re: Fix to remove an unnecessary log [ Upstream commit 43774bc156614346fe5dacabc8e8c229167f2536 ] During destroy_qp, driver sets the qp handle in the existing CQEs belonging to the QP being destroyed to NULL. As a result, a poll_cq after destroy_qp can report unnecessary messages. Remove this noise from system logs. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://lore.kernel.org/r/1684478897-12247-6-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_fp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index d6b7c0d1f676..d44b6a5c90b5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2732,11 +2732,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, qp = (struct bnxt_qplib_qp *)((unsigned long) le64_to_cpu(hwcqe->qp_handle)); - if (!qp) { - dev_err(&cq->hwq.pdev->dev, - "FP: CQ Process terminal qp is NULL\n"); + if (!qp) return -EINVAL; - } /* Must block new posting of SQ and RQ */ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR; From e8b131d216380fc0105649c0ef733067e379fe6a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 17 May 2023 13:32:25 +0300 Subject: [PATCH 119/509] ARM: dts: gta04: Move model property out of pinctrl node [ Upstream commit 4ffec92e70ac5097b9f67ec154065305b16a3b46 ] The model property should be at the top level, let's move it out of the pinctrl node. Fixes: d2eaf949d2c3 ("ARM: dts: omap3-gta04a5one: define GTA04A5 variant with OneNAND") Cc: Andreas Kemnade Cc: H. Nikolaus Schaller Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3-gta04a5one.dts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap3-gta04a5one.dts b/arch/arm/boot/dts/omap3-gta04a5one.dts index 9db9fe67cd63..95df45cc70c0 100644 --- a/arch/arm/boot/dts/omap3-gta04a5one.dts +++ b/arch/arm/boot/dts/omap3-gta04a5one.dts @@ -5,9 +5,11 @@ #include "omap3-gta04a5.dts" -&omap3_pmx_core { +/ { model = "Goldelico GTA04A5/Letux 2804 with OneNAND"; +}; +&omap3_pmx_core { gpmc_pins: pinmux_gpmc_pins { pinctrl-single,pins = < From 160ac75a5a82ee8af929b9db7246b0e92d10df7a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 19 Apr 2023 23:18:40 +0200 Subject: [PATCH 120/509] arm64: dts: qcom: msm8916: correct camss unit address [ Upstream commit 48798d992ce276cf0d57bf75318daf8eabd02aa4 ] Match unit-address to reg entry to fix dtbs W=1 warnings: Warning (simple_bus_reg): /soc@0/camss@1b00000: simple-bus unit address format error, expected "1b0ac00" Fixes: 58f479f90a7c ("arm64: dts: qcom: msm8916: Add CAMSS support") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230419211856.79332-2-krzysztof.kozlowski@linaro.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index c32e4a3833f2..5b79e4a37331 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1006,7 +1006,7 @@ dsi_phy0: dsi-phy@1a98300 { }; }; - camss: camss@1b00000 { + camss: camss@1b0ac00 { compatible = "qcom,msm8916-camss"; reg = <0x01b0ac00 0x200>, <0x01b00030 0x4>, From 6d103b1cc13367d6b5369b6956ae6cc4c16bff9e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 19 Apr 2023 23:18:46 +0200 Subject: [PATCH 121/509] arm64: dts: qcom: msm8994: correct SPMI unit address [ Upstream commit 24f0f6a8059c7108d4ee3476c95db1e7ff4feb79 ] Match unit-address to reg entry to fix dtbs W=1 warnings: Warning (simple_bus_reg): /soc/spmi@fc4c0000: simple-bus unit address format error, expected "fc4cf000" Fixes: b0ad598f8ec0 ("arm64: dts: qcom: msm8994: Add SPMI PMIC arbiter device") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230419211856.79332-8-krzysztof.kozlowski@linaro.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8994.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index aeb5762566e9..caaf7102f579 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -489,7 +489,7 @@ restart@fc4ab000 { reg = <0xfc4ab000 0x4>; }; - spmi_bus: spmi@fc4c0000 { + spmi_bus: spmi@fc4cf000 { compatible = "qcom,spmi-pmic-arb"; reg = <0xfc4cf000 0x1000>, <0xfc4cb000 0x1000>, From 02d8b008ffeefae0bac88e14ae2479bc017fb3d2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 19 Apr 2023 23:18:47 +0200 Subject: [PATCH 122/509] arm64: dts: qcom: msm8996: correct camss unit address [ Upstream commit e959ced1d0e5ef0b1f66a0c2d0e1ae80790e5ca5 ] Match unit-address to reg entry to fix dtbs W=1 warnings: Warning (simple_bus_reg): /soc/camss@a00000: simple-bus unit address format error, expected "a34000" Fixes: e0531312e78f ("arm64: dts: qcom: msm8996: Add CAMSS support") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230419211856.79332-9-krzysztof.kozlowski@linaro.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 159cdd03e7c0..73f7490911c9 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -956,7 +956,7 @@ ufsphy_lane: lanes@627400 { }; }; - camss: camss@a00000 { + camss: camss@a34000 { compatible = "qcom,msm8996-camss"; reg = <0x00a34000 0x1000>, <0x00a00030 0x4>, From b574cd7e4dfc1de10aea8fe4bc28daed31883652 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Tue, 16 May 2023 10:50:39 +0200 Subject: [PATCH 123/509] drm/panel: simple: fix active size for Ampire AM-480272H3TMQW-T01H [ Upstream commit f24b49550814fdee4a98b9552e35e243ccafd4a8 ] The previous setting was related to the overall dimension and not to the active display area. In the "PHYSICAL SPECIFICATIONS" section, the datasheet shows the following parameters: ---------------------------------------------------------- | Item | Specifications | unit | ---------------------------------------------------------- | Display area | 98.7 (W) x 57.5 (H) | mm | ---------------------------------------------------------- | Overall dimension | 105.5(W) x 67.2(H) x 4.96(D) | mm | ---------------------------------------------------------- Fixes: 966fea78adf2 ("drm/panel: simple: Add support for Ampire AM-480272H3TMQW-T01H") Signed-off-by: Dario Binacchi Reviewed-by: Neil Armstrong [narmstrong: fixed Fixes commit id length] Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230516085039.3797303-1-dario.binacchi@amarulasolutions.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 1a87cc445b5e..b0b92f436879 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -704,8 +704,8 @@ static const struct panel_desc ampire_am_480272h3tmqw_t01h = { .num_modes = 1, .bpc = 8, .size = { - .width = 105, - .height = 67, + .width = 99, + .height = 58, }, .bus_format = MEDIA_BUS_FMT_RGB888_1X24, }; From aec18da74194f35e62e258b58ea456e9a3b0a23a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 17:30:58 +0200 Subject: [PATCH 124/509] ARM: ep93xx: fix missing-prototype warnings [ Upstream commit 419013740ea1e4343d8ade535d999f59fa28e460 ] ep93xx_clocksource_read() is only called from the file it is declared in, while ep93xx_timer_init() is declared in a header that is not included here. arch/arm/mach-ep93xx/timer-ep93xx.c:120:13: error: no previous prototype for 'ep93xx_timer_init' arch/arm/mach-ep93xx/timer-ep93xx.c:63:5: error: no previous prototype for 'ep93xx_clocksource_read' Fixes: 000bc17817bf ("ARM: ep93xx: switch to GENERIC_CLOCKEVENTS") Acked-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20230516153109.514251-3-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/mach-ep93xx/timer-ep93xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-ep93xx/timer-ep93xx.c b/arch/arm/mach-ep93xx/timer-ep93xx.c index dd4b164d1831..a9efa7bc2fa1 100644 --- a/arch/arm/mach-ep93xx/timer-ep93xx.c +++ b/arch/arm/mach-ep93xx/timer-ep93xx.c @@ -9,6 +9,7 @@ #include #include #include "soc.h" +#include "platform.h" /************************************************************************* * Timer handling for EP93xx @@ -60,7 +61,7 @@ static u64 notrace ep93xx_read_sched_clock(void) return ret; } -u64 ep93xx_clocksource_read(struct clocksource *c) +static u64 ep93xx_clocksource_read(struct clocksource *c) { u64 ret; From c85a076215a9f80b8456ed6d217fd19dfc7fafcb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 17:31:04 +0200 Subject: [PATCH 125/509] ARM: omap2: fix missing tick_broadcast() prototype [ Upstream commit 861bc1d2886d47bd57a2cbf2cda87fdbe3eb9d08 ] omap2 contains a hack to define tick_broadcast() on non-SMP configurations in place of the normal SMP definition. This one causes a warning because of a missing prototype: arch/arm/mach-omap2/board-generic.c:44:6: error: no previous prototype for 'tick_broadcast' Make sure to always include the header with the declaration. Fixes: d86ad463d670 ("ARM: OMAP2+: Fix regression for using local timer on non-SMP SoCs") Acked-by: Aaro Koskinen Link: https://lore.kernel.org/r/20230516153109.514251-9-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin --- arch/arm/mach-omap2/board-generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 1610c567a6a3..10d2f078e4a8 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include From 392ee3cc995dd297ce575e08d16b46a1f6fcdde6 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 7 May 2023 19:45:16 +0200 Subject: [PATCH 126/509] arm64: dts: qcom: apq8096: fix fixed regulator name property [ Upstream commit c77612a07d18d4425fd8ddd532a8a9b8e1970c53 ] Correct the typo in 'regulator-name' property. apq8096-ifc6640.dtb: v1p05-regulator: 'regulator-name' is a required property apq8096-ifc6640.dtb: v1p05-regulator: Unevaluated properties are not allowed ('reglator-name' was unexpected) Fixes: 6cbdec2d3ca6 ("arm64: dts: qcom: msm8996: Introduce IFC6640") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230507174516.264936-3-krzysztof.kozlowski@linaro.org Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts b/arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts index f6ddf17ada81..861b356a982b 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts +++ b/arch/arm64/boot/dts/qcom/apq8096-ifc6640.dts @@ -26,7 +26,7 @@ chosen { v1p05: v1p05-regulator { compatible = "regulator-fixed"; - reglator-name = "v1p05"; + regulator-name = "v1p05"; regulator-always-on; regulator-boot-on; @@ -38,7 +38,7 @@ v1p05: v1p05-regulator { v12_poe: v12-poe-regulator { compatible = "regulator-fixed"; - reglator-name = "v12_poe"; + regulator-name = "v12_poe"; regulator-always-on; regulator-boot-on; From ddc74d6ea3dcf7fd6e6d1f46dd0351678898b1a7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 18 May 2023 02:42:32 +0200 Subject: [PATCH 127/509] ARM: dts: stm32: Shorten the AV96 HDMI sound card name [ Upstream commit 0cf765e598712addec34d0208cc1418c151fefb2 ] Fix the following error in kernel log due to too long sound card name: " asoc-audio-graph-card sound: ASoC: driver name too long 'STM32MP1-AV96-HDMI' -> 'STM32MP1-AV96-H' " Fixes: e027da342772 ("ARM: dts: stm32: Add bindings for audio on AV96") Signed-off-by: Marek Vasut Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi index c43cf62736a6..d8547307a950 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi @@ -88,7 +88,7 @@ sd_switch: regulator-sd_switch { sound { compatible = "audio-graph-card"; - label = "STM32MP1-AV96-HDMI"; + label = "STM32-AV96-HDMI"; dais = <&sai2a_port>; status = "okay"; }; From 105af71974eacb1e7abe3bd64c0864693db5374e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 13 May 2023 13:29:31 +0200 Subject: [PATCH 128/509] memory: brcmstb_dpfe: fix testing array offset after use [ Upstream commit 1d9e93fad549bc38f593147479ee063f2872c170 ] Code should first check for valid value of array offset, then use it as the index. Fixes smatch warning: drivers/memory/brcmstb_dpfe.c:443 __send_command() error: testing array offset 'cmd' after use. Fixes: 2f330caff577 ("memory: brcmstb: Add driver for DPFE") Acked-by: Markus Mayer Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20230513112931.176066-1-krzysztof.kozlowski@linaro.org Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/memory/brcmstb_dpfe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/memory/brcmstb_dpfe.c b/drivers/memory/brcmstb_dpfe.c index f43ba69fbb3e..2daae2e0cb19 100644 --- a/drivers/memory/brcmstb_dpfe.c +++ b/drivers/memory/brcmstb_dpfe.c @@ -434,15 +434,17 @@ static void __finalize_command(struct brcmstb_dpfe_priv *priv) static int __send_command(struct brcmstb_dpfe_priv *priv, unsigned int cmd, u32 result[]) { - const u32 *msg = priv->dpfe_api->command[cmd]; void __iomem *regs = priv->regs; unsigned int i, chksum, chksum_idx; + const u32 *msg; int ret = 0; u32 resp; if (cmd >= DPFE_CMD_MAX) return -1; + msg = priv->dpfe_api->command[cmd]; + mutex_lock(&priv->lock); /* Wait for DCPU to become ready */ From d883e16c7f35478ad0950f4cb4f6bba011345b6c Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 30 May 2023 21:11:38 +0300 Subject: [PATCH 129/509] ASoC: es8316: Increment max value for ALC Capture Target Volume control [ Upstream commit 6f073429037cd79d7311cd8236311c53f5ea8f01 ] The following error occurs when trying to restore a previously saved ALSA mixer state (tested on a Rock 5B board): $ alsactl --no-ucm -f /tmp/asound.state store hw:Analog $ alsactl --no-ucm -I -f /tmp/asound.state restore hw:Analog alsactl: set_control:1475: Cannot write control '2:0:0:ALC Capture Target Volume:0' : Invalid argument According to ES8316 datasheet, the register at address 0x2B, which is related to the above mixer control, contains by default the value 0xB0. Considering the corresponding ALC target bits (ALCLVL) are 7:4, the control is initialized with 11, which is one step above the maximum value allowed by the driver: ALCLVL | dB gain -------+-------- 0000 | -16.5 0001 | -15.0 0010 | -13.5 .... | ..... 0111 | -6.0 1000 | -4.5 1001 | -3.0 1010 | -1.5 .... | ..... 1111 | -1.5 The tests performed using the VU meter feature (--vumeter=TYPE) of arecord/aplay confirm the specs are correct and there is no measured gain if the 1011-1111 range would have been mapped to 0 dB: dB gain | VU meter % --------+----------- -6.0 | 30-31 -4.5 | 35-36 -3.0 | 42-43 -1.5 | 50-51 0.0 | 50-51 Increment the max value allowed for ALC Capture Target Volume control, so that it matches the hardware default. Additionally, update the related TLV to prevent an artificial extension of the dB gain range. Fixes: b8b88b70875a ("ASoC: add es8316 codec driver") Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20230530181140.483936-2-cristian.ciocaltea@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/es8316.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c index bc3d46617a11..423d9ce2df26 100644 --- a/sound/soc/codecs/es8316.c +++ b/sound/soc/codecs/es8316.c @@ -52,7 +52,12 @@ static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(dac_vol_tlv, -9600, 50, 1); static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(adc_vol_tlv, -9600, 50, 1); static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(alc_max_gain_tlv, -650, 150, 0); static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(alc_min_gain_tlv, -1200, 150, 0); -static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(alc_target_tlv, -1650, 150, 0); + +static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(alc_target_tlv, + 0, 10, TLV_DB_SCALE_ITEM(-1650, 150, 0), + 11, 11, TLV_DB_SCALE_ITEM(-150, 0, 0), +); + static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(hpmixer_gain_tlv, 0, 4, TLV_DB_SCALE_ITEM(-1200, 150, 0), 8, 11, TLV_DB_SCALE_ITEM(-450, 150, 0), @@ -115,7 +120,7 @@ static const struct snd_kcontrol_new es8316_snd_controls[] = { alc_max_gain_tlv), SOC_SINGLE_TLV("ALC Capture Min Volume", ES8316_ADC_ALC2, 0, 28, 0, alc_min_gain_tlv), - SOC_SINGLE_TLV("ALC Capture Target Volume", ES8316_ADC_ALC3, 4, 10, 0, + SOC_SINGLE_TLV("ALC Capture Target Volume", ES8316_ADC_ALC3, 4, 11, 0, alc_target_tlv), SOC_SINGLE("ALC Capture Hold Time", ES8316_ADC_ALC3, 0, 10, 0), SOC_SINGLE("ALC Capture Decay Time", ES8316_ADC_ALC4, 4, 10, 0), From 684a2f180e4632b40e8b8e4cf91879f274ab448b Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 30 May 2023 21:11:39 +0300 Subject: [PATCH 130/509] ASoC: es8316: Do not set rate constraints for unsupported MCLKs [ Upstream commit 60413129ee2b38a80347489270af7f6e1c1de4d0 ] When using the codec through the generic audio graph card, there are at least two calls of es8316_set_dai_sysclk(), with the effect of limiting the allowed sample rates according to the MCLK/LRCK ratios supported by the codec: 1. During audio card setup, to set the initial MCLK - see asoc_simple_init_dai(). 2. Before opening a stream, to update MCLK, according to the stream sample rate and the multiplication factor - see asoc_simple_hw_params(). In some cases the initial MCLK might be set to a frequency that doesn't match any of the supported ratios, e.g. 12287999 instead of 12288000, which is only 1 Hz below the supported clock, as that is what the hardware reports. This creates an empty list of rate constraints, which is further passed to snd_pcm_hw_constraint_list() via es8316_pcm_startup(), and causes the following error on the very first access of the sound card: $ speaker-test -D hw:Analog,0 -F S16_LE -c 2 -t wav Broken configuration for playback: no configurations available: Invalid argument Setting of hwparams failed: Invalid argument Note that all subsequent retries succeed thanks to the updated MCLK set at point 2 above, which uses a computed frequency value instead of a reading from the hardware registers. Normally this would have mitigated the issue, but es8316_pcm_startup() executes before the 2nd call to es8316_set_dai_sysclk(), hence it cannot make use of the updated constraints. Since es8316_pcm_hw_params() performs anyway a final validation of MCLK against the stream sample rate and the supported MCLK/LRCK ratios, fix the issue by ensuring that sysclk_constraints list is only set when at least one supported sample rate is autodetected by the codec. Fixes: b8b88b70875a ("ASoC: add es8316 codec driver") Signed-off-by: Cristian Ciocaltea Link: https://lore.kernel.org/r/20230530181140.483936-3-cristian.ciocaltea@collabora.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/es8316.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c index 423d9ce2df26..03ad34a275da 100644 --- a/sound/soc/codecs/es8316.c +++ b/sound/soc/codecs/es8316.c @@ -369,13 +369,11 @@ static int es8316_set_dai_sysclk(struct snd_soc_dai *codec_dai, int count = 0; es8316->sysclk = freq; + es8316->sysclk_constraints.list = NULL; + es8316->sysclk_constraints.count = 0; - if (freq == 0) { - es8316->sysclk_constraints.list = NULL; - es8316->sysclk_constraints.count = 0; - + if (freq == 0) return 0; - } ret = clk_set_rate(es8316->mclk, freq); if (ret) @@ -391,8 +389,10 @@ static int es8316_set_dai_sysclk(struct snd_soc_dai *codec_dai, es8316->allowed_rates[count++] = freq / ratio; } - es8316->sysclk_constraints.list = es8316->allowed_rates; - es8316->sysclk_constraints.count = count; + if (count) { + es8316->sysclk_constraints.list = es8316->allowed_rates; + es8316->sysclk_constraints.count = count; + } return 0; } From 9c14d1406662c3d203d4d007b7abdafed4252d59 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Tue, 16 May 2023 22:30:29 +0200 Subject: [PATCH 131/509] ARM: dts: meson8: correct uart_B and uart_C clock references [ Upstream commit 98b503c7fb13a17a47d8ebf15fa8f7c10118e75c ] On Meson8 uart_B and uart_C do not work, because they are relying on incorrect clocks. Change the references of pclk to the correct CLKID (UART1 for uart_B and UART2 for uart_C), to allow use of the two uarts. This was originally reported by Hans-Frieder Vogt for Meson8b [0], but the same bug is also present in meson8.dtsi [0] https://lore.kernel.org/linux-amlogic/trinity-bf20bcb9-790b-4ab9-99e3-0831ef8257f4-1680878185420@3c-app-gmx-bap55/ Fixes: 57007bfb5469 ("ARM: dts: meson8: Fix the UART device-tree schema validation") Reported-by: Hans-Frieder Vogt # for meson8b.dtsi Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20230516203029.1031174-1-martin.blumenstingl@googlemail.com Signed-off-by: Neil Armstrong Signed-off-by: Sasha Levin --- arch/arm/boot/dts/meson8.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 08533116a39c..0d045add8165 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -611,13 +611,13 @@ &uart_A { &uart_B { compatible = "amlogic,meson8-uart"; - clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clocks = <&xtal>, <&clkc CLKID_UART1>, <&clkc CLKID_CLK81>; clock-names = "xtal", "pclk", "baud"; }; &uart_C { compatible = "amlogic,meson8-uart"; - clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clocks = <&xtal>, <&clkc CLKID_UART2>, <&clkc CLKID_CLK81>; clock-names = "xtal", "pclk", "baud"; }; From 42b6865bf58c53a9e0e845d75a517f3d613b8ebc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 21 May 2023 15:52:16 -0700 Subject: [PATCH 132/509] soc/fsl/qe: fix usb.c build errors [ Upstream commit 7b1a78babd0d2cd27aa07255dee0c2d7ac0f31e3 ] Fix build errors in soc/fsl/qe/usb.c when QUICC_ENGINE is not set. This happens when PPC_EP88XC is set, which selects CPM1 & CPM. When CPM is set, USB_FSL_QE can be set without QUICC_ENGINE being set. When USB_FSL_QE is set, QE_USB deafults to y, which causes build errors when QUICC_ENGINE is not set. Making QE_USB depend on QUICC_ENGINE prevents QE_USB from defaulting to y. Fixes these build errors: drivers/soc/fsl/qe/usb.o: in function `qe_usb_clock_set': usb.c:(.text+0x1e): undefined reference to `qe_immr' powerpc-linux-ld: usb.c:(.text+0x2a): undefined reference to `qe_immr' powerpc-linux-ld: usb.c:(.text+0xbc): undefined reference to `qe_setbrg' powerpc-linux-ld: usb.c:(.text+0xca): undefined reference to `cmxgcr_lock' powerpc-linux-ld: usb.c:(.text+0xce): undefined reference to `cmxgcr_lock' Fixes: 5e41486c408e ("powerpc/QE: add support for QE USB clocks routing") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: https://lore.kernel.org/all/202301101500.pillNv6R-lkp@intel.com/ Suggested-by: Michael Ellerman Cc: Christophe Leroy Cc: Leo Li Cc: Masahiro Yamada Cc: Nicolas Schier Cc: Qiang Zhao Cc: linuxppc-dev Cc: linux-arm-kernel@lists.infradead.org Cc: Kumar Gala Acked-by: Nicolas Schier Signed-off-by: Li Yang Signed-off-by: Sasha Levin --- drivers/soc/fsl/qe/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/fsl/qe/Kconfig b/drivers/soc/fsl/qe/Kconfig index 357c5800b112..7afa796dbbb8 100644 --- a/drivers/soc/fsl/qe/Kconfig +++ b/drivers/soc/fsl/qe/Kconfig @@ -39,6 +39,7 @@ config QE_TDM config QE_USB bool + depends on QUICC_ENGINE default y if USB_FSL_QE help QE USB Controller support From 2d38866a99baedcbf619e96ab8cc1b884124a459 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 25 Nov 2021 20:53:22 +0100 Subject: [PATCH 133/509] IB/hfi1: Use bitmap_zalloc() when applicable [ Upstream commit f86dbc9fc5d83384eae7eda0de17f823e8c81ca0 ] Use 'bitmap_zalloc()' to simplify code, improve the semantic and avoid some open-coded arithmetic in allocator arguments. Also change the corresponding 'kfree()' into 'bitmap_free()' to keep consistency. Link: https://lore.kernel.org/r/d46c6bc1869b8869244fa71943d2cad4104b3668.1637869925.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe Stable-dep-of: c9358de193ec ("IB/hfi1: Fix wrong mmu_node used for user SDMA packet after invalidate") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/user_sdma.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 1eb5a44a4ae6..3f49633bf985 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -202,9 +202,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, if (!pq->reqs) goto pq_reqs_nomem; - pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size), - sizeof(*pq->req_in_use), - GFP_KERNEL); + pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL); if (!pq->req_in_use) goto pq_reqs_no_in_use; @@ -251,7 +249,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, cq_nomem: kmem_cache_destroy(pq->txreq_cache); pq_txreq_nomem: - kfree(pq->req_in_use); + bitmap_free(pq->req_in_use); pq_reqs_no_in_use: kfree(pq->reqs); pq_reqs_nomem: @@ -298,7 +296,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, pq->wait, !atomic_read(&pq->n_reqs)); kfree(pq->reqs); - kfree(pq->req_in_use); + bitmap_free(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); flush_pq_iowait(pq); kfree(pq); From 6cf8f3d690bb5ad31ef0f41a6206ecf5a068d179 Mon Sep 17 00:00:00 2001 From: Patrick Kelsey Date: Thu, 16 Feb 2023 11:56:28 -0500 Subject: [PATCH 134/509] IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors [ Upstream commit fd8958efe8779d3db19c9124fce593ce681ac709 ] Fix three sources of error involving struct sdma_txreq.num_descs. When _extend_sdma_tx_descs() extends the descriptor array, it uses the value of tx->num_descs to determine how many existing entries from the tx's original, internal descriptor array to copy to the newly allocated one. As this value was incremented before the call, the copy loop will access one entry past the internal descriptor array, copying its contents into the corresponding slot in the new array. If the call to _extend_sdma_tx_descs() fails, _pad_smda_tx_descs() then invokes __sdma_tx_clean() which uses the value of tx->num_desc to drive a loop that unmaps all descriptor entries in use. As this value was incremented before the call, the unmap loop will invoke sdma_unmap_desc() on a descriptor entry whose contents consist of whatever random data was copied into it during (1), leading to cascading further calls into the kernel and driver using arbitrary data. _sdma_close_tx() was using tx->num_descs instead of tx->num_descs - 1. Fix all of the above by: - Only increment .num_descs after .descp is extended. - Use .num_descs - 1 instead of .num_descs for last .descp entry. Fixes: f4d26d81ad7f ("staging/rdma/hfi1: Add coalescing support for SDMA TX descriptors") Link: https://lore.kernel.org/r/167656658879.2223096.10026561343022570690.stgit@awfm-02.cornelisnetworks.com Signed-off-by: Brendan Cunningham Signed-off-by: Patrick Kelsey Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Stable-dep-of: c9358de193ec ("IB/hfi1: Fix wrong mmu_node used for user SDMA packet after invalidate") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/sdma.c | 4 ++-- drivers/infiniband/hw/hfi1/sdma.h | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 061562627dae..728bf122ee0a 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3187,8 +3187,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int rval = 0; - tx->num_desc++; - if ((unlikely(tx->num_desc == tx->desc_limit))) { + if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) { rval = _extend_sdma_tx_descs(dd, tx); if (rval) { __sdma_txclean(dd, tx); @@ -3203,6 +3202,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) NULL, dd->sdma_pad_phys, sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1))); + tx->num_desc++; _sdma_close_tx(dd, tx); return rval; } diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 7d4f316ac6e4..5a372ca1f6ac 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -674,14 +674,13 @@ static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx) static inline void _sdma_close_tx(struct hfi1_devdata *dd, struct sdma_txreq *tx) { - tx->descp[tx->num_desc].qw[0] |= - SDMA_DESC0_LAST_DESC_FLAG; - tx->descp[tx->num_desc].qw[1] |= - dd->default_desc1; + u16 last_desc = tx->num_desc - 1; + + tx->descp[last_desc].qw[0] |= SDMA_DESC0_LAST_DESC_FLAG; + tx->descp[last_desc].qw[1] |= dd->default_desc1; if (tx->flags & SDMA_TXREQ_F_URGENT) - tx->descp[tx->num_desc].qw[1] |= - (SDMA_DESC1_HEAD_TO_HOST_FLAG | - SDMA_DESC1_INT_REQ_FLAG); + tx->descp[last_desc].qw[1] |= (SDMA_DESC1_HEAD_TO_HOST_FLAG | + SDMA_DESC1_INT_REQ_FLAG); } static inline int _sdma_txadd_daddr( @@ -700,6 +699,7 @@ static inline int _sdma_txadd_daddr( pinning_ctx, addr, len); WARN_ON(len > tx->tlen); + tx->num_desc++; tx->tlen -= len; /* special cases for last */ if (!tx->tlen) { @@ -711,7 +711,6 @@ static inline int _sdma_txadd_daddr( _sdma_close_tx(dd, tx); } } - tx->num_desc++; return rval; } From 7dcb9ea3ee4b40d7538a812a7edce35fe17a4d36 Mon Sep 17 00:00:00 2001 From: Brendan Cunningham Date: Fri, 19 May 2023 12:32:16 -0400 Subject: [PATCH 135/509] IB/hfi1: Fix wrong mmu_node used for user SDMA packet after invalidate [ Upstream commit c9358de193ecfb360c3ce75f27ce839ca0b0bc8c ] The hfi1 user SDMA pinned-page cache will leave a stale cache entry when the cache-entry's virtual address range is invalidated but that cache entry is in-use by an outstanding SDMA request. Subsequent user SDMA requests with buffers in or spanning the virtual address range of the stale cache entry will result in packets constructed from the wrong memory, the physical pages pointed to by the stale cache entry. To fix this, remove mmu_rb_node cache entries from the mmu_rb_handler cache independent of the cache entry's refcount. Add 'struct kref refcount' to struct mmu_rb_node and manage mmu_rb_node lifetime with kref_get() and kref_put(). mmu_rb_node.refcount makes sdma_mmu_node.refcount redundant. Remove 'atomic_t refcount' from struct sdma_mmu_node and change sdma_mmu_node code to use mmu_rb_node.refcount. Move the mmu_rb_handler destructor call after a wait-for-SDMA-request-completion call so mmu_rb_nodes that need mmu_rb_handler's workqueue to queue themselves up for destruction from an interrupt context may do so. Fixes: f48ad614c100 ("IB/hfi1: Move driver out of staging") Fixes: 00cbce5cbf88 ("IB/hfi1: Fix bugs with non-PAGE_SIZE-end multi-iovec user SDMA requests") Link: https://lore.kernel.org/r/168451393605.3700681.13493776139032178861.stgit@awfm-02.cornelisnetworks.com Reviewed-by: Dean Luick Signed-off-by: Brendan Cunningham Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/ipoib_tx.c | 4 +- drivers/infiniband/hw/hfi1/mmu_rb.c | 101 ++++++++++------- drivers/infiniband/hw/hfi1/mmu_rb.h | 3 + drivers/infiniband/hw/hfi1/sdma.c | 23 +++- drivers/infiniband/hw/hfi1/sdma.h | 47 +++++--- drivers/infiniband/hw/hfi1/sdma_txreq.h | 2 + drivers/infiniband/hw/hfi1/user_sdma.c | 137 ++++++++++-------------- drivers/infiniband/hw/hfi1/user_sdma.h | 1 - drivers/infiniband/hw/hfi1/vnic_sdma.c | 4 +- 9 files changed, 177 insertions(+), 145 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index 956fc3fd88b9..188048468135 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -251,11 +251,11 @@ static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx, const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; ret = sdma_txadd_page(dd, - NULL, txreq, skb_frag_page(frag), frag->bv_offset, - skb_frag_size(frag)); + skb_frag_size(frag), + NULL, NULL, NULL); if (unlikely(ret)) break; } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index d331184ded30..a501b7a682fc 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -60,8 +60,7 @@ static int mmu_notifier_range_start(struct mmu_notifier *, const struct mmu_notifier_range *); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); -static void do_remove(struct mmu_rb_handler *handler, - struct list_head *del_list); +static void release_immediate(struct kref *refcount); static void handle_remove(struct work_struct *work); static const struct mmu_notifier_ops mn_opts = { @@ -144,7 +143,11 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) } spin_unlock_irqrestore(&handler->lock, flags); - do_remove(handler, &del_list); + while (!list_empty(&del_list)) { + rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); + list_del(&rbnode->list); + kref_put(&rbnode->refcount, release_immediate); + } /* Now the mm may be freed. */ mmdrop(handler->mn.mm); @@ -172,12 +175,6 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, } __mmu_int_rb_insert(mnode, &handler->root); list_add_tail(&mnode->list, &handler->lru_list); - - ret = handler->ops->insert(handler->ops_arg, mnode); - if (ret) { - __mmu_int_rb_remove(mnode, &handler->root); - list_del(&mnode->list); /* remove from LRU list */ - } mnode->handler = handler; unlock: spin_unlock_irqrestore(&handler->lock, flags); @@ -221,6 +218,48 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, return node; } +/* + * Must NOT call while holding mnode->handler->lock. + * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a + * spinlock. + */ +static void release_immediate(struct kref *refcount) +{ + struct mmu_rb_node *mnode = + container_of(refcount, struct mmu_rb_node, refcount); + mnode->handler->ops->remove(mnode->handler->ops_arg, mnode); +} + +/* Caller must hold mnode->handler->lock */ +static void release_nolock(struct kref *refcount) +{ + struct mmu_rb_node *mnode = + container_of(refcount, struct mmu_rb_node, refcount); + list_move(&mnode->list, &mnode->handler->del_list); + queue_work(mnode->handler->wq, &mnode->handler->del_work); +} + +/* + * struct mmu_rb_node->refcount kref_put() callback. + * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues + * handler->del_work on handler->wq. + * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root. + * Acquires mmu_rb_node->handler->lock; do not call while already holding + * handler->lock. + */ +void hfi1_mmu_rb_release(struct kref *refcount) +{ + struct mmu_rb_node *mnode = + container_of(refcount, struct mmu_rb_node, refcount); + struct mmu_rb_handler *handler = mnode->handler; + unsigned long flags; + + spin_lock_irqsave(&handler->lock, flags); + list_move(&mnode->list, &mnode->handler->del_list); + spin_unlock_irqrestore(&handler->lock, flags); + queue_work(handler->wq, &handler->del_work); +} + void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) { struct mmu_rb_node *rbnode, *ptr; @@ -235,6 +274,10 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) spin_lock_irqsave(&handler->lock, flags); list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) { + /* refcount == 1 implies mmu_rb_handler has only rbnode ref */ + if (kref_read(&rbnode->refcount) > 1) + continue; + if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, &stop)) { __mmu_int_rb_remove(rbnode, &handler->root); @@ -247,7 +290,7 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) spin_unlock_irqrestore(&handler->lock, flags); list_for_each_entry_safe(rbnode, ptr, &del_list, list) { - handler->ops->remove(handler->ops_arg, rbnode); + kref_put(&rbnode->refcount, release_immediate); } } @@ -259,7 +302,6 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn, struct rb_root_cached *root = &handler->root; struct mmu_rb_node *node, *ptr = NULL; unsigned long flags; - bool added = false; spin_lock_irqsave(&handler->lock, flags); for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1); @@ -268,38 +310,16 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn, ptr = __mmu_int_rb_iter_next(node, range->start, range->end - 1); trace_hfi1_mmu_mem_invalidate(node->addr, node->len); - if (handler->ops->invalidate(handler->ops_arg, node)) { - __mmu_int_rb_remove(node, root); - /* move from LRU list to delete list */ - list_move(&node->list, &handler->del_list); - added = true; - } + /* Remove from rb tree and lru_list. */ + __mmu_int_rb_remove(node, root); + list_del_init(&node->list); + kref_put(&node->refcount, release_nolock); } spin_unlock_irqrestore(&handler->lock, flags); - if (added) - queue_work(handler->wq, &handler->del_work); - return 0; } -/* - * Call the remove function for the given handler and the list. This - * is expected to be called with a delete list extracted from handler. - * The caller should not be holding the handler lock. - */ -static void do_remove(struct mmu_rb_handler *handler, - struct list_head *del_list) -{ - struct mmu_rb_node *node; - - while (!list_empty(del_list)) { - node = list_first_entry(del_list, struct mmu_rb_node, list); - list_del(&node->list); - handler->ops->remove(handler->ops_arg, node); - } -} - /* * Work queue function to remove all nodes that have been queued up to * be removed. The key feature is that mm->mmap_lock is not being held @@ -312,11 +332,16 @@ static void handle_remove(struct work_struct *work) del_work); struct list_head del_list; unsigned long flags; + struct mmu_rb_node *node; /* remove anything that is queued to get removed */ spin_lock_irqsave(&handler->lock, flags); list_replace_init(&handler->del_list, &del_list); spin_unlock_irqrestore(&handler->lock, flags); - do_remove(handler, &del_list); + while (!list_empty(&del_list)) { + node = list_first_entry(&del_list, struct mmu_rb_node, list); + list_del(&node->list); + handler->ops->remove(handler->ops_arg, node); + } } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h index 0265d81c6206..be85537d2326 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.h +++ b/drivers/infiniband/hw/hfi1/mmu_rb.h @@ -57,6 +57,7 @@ struct mmu_rb_node { struct rb_node node; struct mmu_rb_handler *handler; struct list_head list; + struct kref refcount; }; /* @@ -92,6 +93,8 @@ int hfi1_mmu_rb_register(void *ops_arg, void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler); int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, struct mmu_rb_node *mnode); +void hfi1_mmu_rb_release(struct kref *refcount); + void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg); struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler, unsigned long addr, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 728bf122ee0a..2dc97de434a5 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1635,7 +1635,20 @@ static inline void sdma_unmap_desc( struct hfi1_devdata *dd, struct sdma_desc *descp) { - system_descriptor_complete(dd, descp); + switch (sdma_mapping_type(descp)) { + case SDMA_MAP_SINGLE: + dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp), + sdma_mapping_len(descp), DMA_TO_DEVICE); + break; + case SDMA_MAP_PAGE: + dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp), + sdma_mapping_len(descp), DMA_TO_DEVICE); + break; + } + + if (descp->pinning_ctx && descp->ctx_put) + descp->ctx_put(descp->pinning_ctx); + descp->pinning_ctx = NULL; } /* @@ -3155,8 +3168,8 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx, /* Add descriptor for coalesce buffer */ tx->desc_limit = MAX_DESC; - return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx, - addr, tx->tlen); + return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, + addr, tx->tlen, NULL, NULL, NULL); } return 1; @@ -3199,9 +3212,9 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) make_tx_sdma_desc( tx, SDMA_MAP_NONE, - NULL, dd->sdma_pad_phys, - sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1))); + sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)), + NULL, NULL, NULL); tx->num_desc++; _sdma_close_tx(dd, tx); return rval; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 5a372ca1f6ac..7611f09d78dc 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -635,9 +635,11 @@ static inline dma_addr_t sdma_mapping_addr(struct sdma_desc *d) static inline void make_tx_sdma_desc( struct sdma_txreq *tx, int type, - void *pinning_ctx, dma_addr_t addr, - size_t len) + size_t len, + void *pinning_ctx, + void (*ctx_get)(void *), + void (*ctx_put)(void *)) { struct sdma_desc *desc = &tx->descp[tx->num_desc]; @@ -654,7 +656,11 @@ static inline void make_tx_sdma_desc( << SDMA_DESC0_PHY_ADDR_SHIFT) | (((u64)len & SDMA_DESC0_BYTE_COUNT_MASK) << SDMA_DESC0_BYTE_COUNT_SHIFT); + desc->pinning_ctx = pinning_ctx; + desc->ctx_put = ctx_put; + if (pinning_ctx && ctx_get) + ctx_get(pinning_ctx); } /* helper to extend txreq */ @@ -686,18 +692,20 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd, static inline int _sdma_txadd_daddr( struct hfi1_devdata *dd, int type, - void *pinning_ctx, struct sdma_txreq *tx, dma_addr_t addr, - u16 len) + u16 len, + void *pinning_ctx, + void (*ctx_get)(void *), + void (*ctx_put)(void *)) { int rval = 0; make_tx_sdma_desc( tx, type, - pinning_ctx, - addr, len); + addr, len, + pinning_ctx, ctx_get, ctx_put); WARN_ON(len > tx->tlen); tx->num_desc++; tx->tlen -= len; @@ -717,11 +725,18 @@ static inline int _sdma_txadd_daddr( /** * sdma_txadd_page() - add a page to the sdma_txreq * @dd: the device to use for mapping - * @pinning_ctx: context to be released at descriptor retirement * @tx: tx request to which the page is added * @page: page to map * @offset: offset within the page * @len: length in bytes + * @pinning_ctx: context to be stored on struct sdma_desc .pinning_ctx. Not + * added if coalesce buffer is used. E.g. pointer to pinned-page + * cache entry for the sdma_desc. + * @ctx_get: optional function to take reference to @pinning_ctx. Not called if + * @pinning_ctx is NULL. + * @ctx_put: optional function to release reference to @pinning_ctx after + * sdma_desc completes. May be called in interrupt context so must + * not sleep. Not called if @pinning_ctx is NULL. * * This is used to add a page/offset/length descriptor. * @@ -733,11 +748,13 @@ static inline int _sdma_txadd_daddr( */ static inline int sdma_txadd_page( struct hfi1_devdata *dd, - void *pinning_ctx, struct sdma_txreq *tx, struct page *page, unsigned long offset, - u16 len) + u16 len, + void *pinning_ctx, + void (*ctx_get)(void *), + void (*ctx_put)(void *)) { dma_addr_t addr; int rval; @@ -761,7 +778,8 @@ static inline int sdma_txadd_page( return -ENOSPC; } - return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, pinning_ctx, tx, addr, len); + return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, tx, addr, len, + pinning_ctx, ctx_get, ctx_put); } /** @@ -795,8 +813,8 @@ static inline int sdma_txadd_daddr( return rval; } - return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, NULL, tx, - addr, len); + return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len, + NULL, NULL, NULL); } /** @@ -842,7 +860,8 @@ static inline int sdma_txadd_kvaddr( return -ENOSPC; } - return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx, addr, len); + return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, addr, len, + NULL, NULL, NULL); } struct iowait_work; @@ -1093,6 +1112,4 @@ u16 sdma_get_descq_cnt(void); extern uint mod_num_sdma; void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid); - -void system_descriptor_complete(struct hfi1_devdata *dd, struct sdma_desc *descp); #endif diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h index 4204650cebc2..fb091b5834b5 100644 --- a/drivers/infiniband/hw/hfi1/sdma_txreq.h +++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h @@ -62,6 +62,8 @@ struct sdma_desc { /* private: don't use directly */ u64 qw[2]; void *pinning_ctx; + /* Release reference to @pinning_ctx. May be called in interrupt context. Must not sleep. */ + void (*ctx_put)(void *ctx); }; /** diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 3f49633bf985..a67791187d46 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -103,18 +103,14 @@ static int defer_packet_queue( static void activate_packet_queue(struct iowait *wait, int reason); static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, unsigned long len); -static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode); static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2, bool *stop); static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); -static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode); static struct mmu_rb_ops sdma_rb_ops = { .filter = sdma_rb_filter, - .insert = sdma_rb_insert, .evict = sdma_rb_evict, .remove = sdma_rb_remove, - .invalidate = sdma_rb_invalidate }; static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, @@ -288,14 +284,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, spin_unlock(&fd->pq_rcu_lock); synchronize_srcu(&fd->pq_srcu); /* at this point there can be no more new requests */ - if (pq->handler) - hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); /* Wait until all requests have been freed. */ wait_event_interruptible( pq->wait, !atomic_read(&pq->n_reqs)); kfree(pq->reqs); + if (pq->handler) + hfi1_mmu_rb_unregister(pq->handler); bitmap_free(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); flush_pq_iowait(pq); @@ -1316,25 +1312,17 @@ static void free_system_node(struct sdma_mmu_node *node) kfree(node); } -static inline void acquire_node(struct sdma_mmu_node *node) -{ - atomic_inc(&node->refcount); - WARN_ON(atomic_read(&node->refcount) < 0); -} - -static inline void release_node(struct mmu_rb_handler *handler, - struct sdma_mmu_node *node) -{ - atomic_dec(&node->refcount); - WARN_ON(atomic_read(&node->refcount) < 0); -} - +/* + * kref_get()'s an additional kref on the returned rb_node to prevent rb_node + * from being released until after rb_node is assigned to an SDMA descriptor + * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the + * virtual address range for rb_node is invalidated between now and then. + */ static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, unsigned long start, unsigned long end) { struct mmu_rb_node *rb_node; - struct sdma_mmu_node *node; unsigned long flags; spin_lock_irqsave(&handler->lock, flags); @@ -1343,11 +1331,12 @@ static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, spin_unlock_irqrestore(&handler->lock, flags); return NULL; } - node = container_of(rb_node, struct sdma_mmu_node, rb); - acquire_node(node); + + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ + kref_get(&rb_node->refcount); spin_unlock_irqrestore(&handler->lock, flags); - return node; + return container_of(rb_node, struct sdma_mmu_node, rb); } static int pin_system_pages(struct user_sdma_request *req, @@ -1396,6 +1385,13 @@ static int pin_system_pages(struct user_sdma_request *req, return 0; } +/* + * kref refcount on *node_p will be 2 on successful addition: one kref from + * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being + * released until after *node_p is assigned to an SDMA descriptor (struct + * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual + * address range for *node_p is invalidated between now and then. + */ static int add_system_pinning(struct user_sdma_request *req, struct sdma_mmu_node **node_p, unsigned long start, unsigned long len) @@ -1409,6 +1405,12 @@ static int add_system_pinning(struct user_sdma_request *req, if (!node) return -ENOMEM; + /* First kref "moves" to mmu_rb_handler */ + kref_init(&node->rb.refcount); + + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ + kref_get(&node->rb.refcount); + node->pq = pq; ret = pin_system_pages(req, start, len, node, PFN_DOWN(len)); if (ret == 0) { @@ -1472,15 +1474,15 @@ static int get_system_cache_entry(struct user_sdma_request *req, return 0; } - SDMA_DBG(req, "prepend: node->rb.addr %lx, node->refcount %d", - node->rb.addr, atomic_read(&node->refcount)); + SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d", + node->rb.addr, kref_read(&node->rb.refcount)); prepend_len = node->rb.addr - start; /* * This node will not be returned, instead a new node * will be. So release the reference. */ - release_node(handler, node); + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); /* Prepend a node to cover the beginning of the allocation */ ret = add_system_pinning(req, node_p, start, prepend_len); @@ -1492,6 +1494,20 @@ static int get_system_cache_entry(struct user_sdma_request *req, } } +static void sdma_mmu_rb_node_get(void *ctx) +{ + struct mmu_rb_node *node = ctx; + + kref_get(&node->refcount); +} + +static void sdma_mmu_rb_node_put(void *ctx) +{ + struct sdma_mmu_node *node = ctx; + + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); +} + static int add_mapping_to_sdma_packet(struct user_sdma_request *req, struct user_sdma_txreq *tx, struct sdma_mmu_node *cache_entry, @@ -1535,9 +1551,12 @@ static int add_mapping_to_sdma_packet(struct user_sdma_request *req, ctx = cache_entry; } - ret = sdma_txadd_page(pq->dd, ctx, &tx->txreq, + ret = sdma_txadd_page(pq->dd, &tx->txreq, cache_entry->pages[page_index], - page_offset, from_this_page); + page_offset, from_this_page, + ctx, + sdma_mmu_rb_node_get, + sdma_mmu_rb_node_put); if (ret) { /* * When there's a failure, the entire request is freed by @@ -1559,8 +1578,6 @@ static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, struct user_sdma_iovec *iovec, size_t from_this_iovec) { - struct mmu_rb_handler *handler = req->pq->handler; - while (from_this_iovec > 0) { struct sdma_mmu_node *cache_entry; size_t from_this_cache_entry; @@ -1581,15 +1598,15 @@ static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start, from_this_cache_entry); + + /* + * Done adding cache_entry to zero or more sdma_desc. Can + * kref_put() the "safety" kref taken under + * get_system_cache_entry(). + */ + kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release); + if (ret) { - /* - * We're guaranteed that there will be no descriptor - * completion callback that releases this node - * because only the last descriptor referencing it - * has a context attached, and a failure means the - * last descriptor was never added. - */ - release_node(handler, cache_entry); SDMA_DBG(req, "add system segment failed %d", ret); return ret; } @@ -1640,42 +1657,12 @@ static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, return 0; } -void system_descriptor_complete(struct hfi1_devdata *dd, - struct sdma_desc *descp) -{ - switch (sdma_mapping_type(descp)) { - case SDMA_MAP_SINGLE: - dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp), - sdma_mapping_len(descp), DMA_TO_DEVICE); - break; - case SDMA_MAP_PAGE: - dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp), - sdma_mapping_len(descp), DMA_TO_DEVICE); - break; - } - - if (descp->pinning_ctx) { - struct sdma_mmu_node *node = descp->pinning_ctx; - - release_node(node->rb.handler, node); - } -} - static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, unsigned long len) { return (bool)(node->addr == addr); } -static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode) -{ - struct sdma_mmu_node *node = - container_of(mnode, struct sdma_mmu_node, rb); - - atomic_inc(&node->refcount); - return 0; -} - /* * Return 1 to remove the node from the rb tree and call the remove op. * @@ -1688,10 +1675,6 @@ static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, container_of(mnode, struct sdma_mmu_node, rb); struct evict_data *evict_data = evict_arg; - /* is this node still being used? */ - if (atomic_read(&node->refcount)) - return 0; /* keep this node */ - /* this node will be evicted, add its pages to our count */ evict_data->cleared += node->npages; @@ -1709,13 +1692,3 @@ static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) free_system_node(node); } - -static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode) -{ - struct sdma_mmu_node *node = - container_of(mnode, struct sdma_mmu_node, rb); - - if (!atomic_read(&node->refcount)) - return 1; - return 0; -} diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 9d417aacfa8b..b2b26b71fcef 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -145,7 +145,6 @@ struct hfi1_user_sdma_comp_q { struct sdma_mmu_node { struct mmu_rb_node rb; struct hfi1_user_sdma_pkt_q *pq; - atomic_t refcount; struct page **pages; unsigned int npages; }; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 7658c620a125..ab8bcdf10447 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -106,11 +106,11 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde, /* combine physically continuous fragments later? */ ret = sdma_txadd_page(sde->dd, - NULL, &tx->txreq, skb_frag_page(frag), skb_frag_off(frag), - skb_frag_size(frag)); + skb_frag_size(frag), + NULL, NULL, NULL); if (unlikely(ret)) goto bail_txadd; } From cc1b04b699e63a99e6790bef262651cba1cf029e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:02 -0300 Subject: [PATCH 136/509] RDMA: Remove uverbs_ex_cmd_mask values that are linked to functions [ Upstream commit b8e3130dd96b7b2d6d92e62dcd1515af30212fe2 ] Since a while now the uverbs layer checks if the driver implements a function before allowing the ucmd to proceed. This largely obsoletes the cmd_mask stuff, but there is some tricky bits in drivers preventing it from being removed. Remove the easy elements of uverbs_ex_cmd_mask by pre-setting them in the core code. These are triggered soley based on the related ops function pointer. query_device_ex is not triggered based on an op, but all drivers already implement something compatible with the extension, so enable it globally too. Link: https://lore.kernel.org/r/2-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Stable-dep-of: cf5b608fb0e3 ("RDMA/hns: Fix hns_roce_table_get return value") Signed-off-by: Sasha Levin --- drivers/infiniband/core/device.c | 11 +++++++++++ drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/hw/efa/efa_main.c | 3 --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 7 ------- drivers/infiniband/hw/hns/hns_roce_main.c | 2 -- drivers/infiniband/hw/mlx4/main.c | 14 +------------- drivers/infiniband/hw/mlx5/main.c | 14 ++------------ 7 files changed, 15 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5b7abcf102fe..3c29fd04b301 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -600,6 +600,17 @@ struct ib_device *_ib_alloc_device(size_t size) init_completion(&device->unreg_completion); INIT_WORK(&device->unregistration_work, ib_unregister_work); + device->uverbs_ex_cmd_mask = + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + BIT_ULL(IB_USER_VERBS_EX_CMD_CREATE_WQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL) | + BIT_ULL(IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + BIT_ULL(IB_USER_VERBS_EX_CMD_QUERY_DEVICE); + return device; } EXPORT_SYMBOL(_ib_alloc_device); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 09cf470c08d6..158f9eadc4e9 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3778,7 +3778,7 @@ const struct uapi_definition uverbs_def_write_intf[] = { IB_USER_VERBS_EX_CMD_MODIFY_CQ, ib_uverbs_ex_modify_cq, UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq), - UAPI_DEF_METHOD_NEEDS_FN(create_cq))), + UAPI_DEF_METHOD_NEEDS_FN(modify_cq))), DECLARE_UVERBS_OBJECT( UVERBS_OBJECT_DEVICE, diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index ffdd18f4217f..cd41cd114ab6 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -326,9 +326,6 @@ static int efa_ib_device_add(struct efa_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_AH) | (1ull << IB_USER_VERBS_CMD_DESTROY_AH); - dev->ibdev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE); - ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5f4d8a32ed6d..b3d5ba8ef439 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2062,11 +2062,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); } -static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) -{ - return -EOPNOTSUPP; -} - static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { @@ -4347,7 +4342,6 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) static const struct ib_device_ops hns_roce_v1_dev_ops = { .destroy_qp = hns_roce_v1_destroy_qp, - .modify_cq = hns_roce_v1_modify_cq, .poll_cq = hns_roce_v1_poll_cq, .post_recv = hns_roce_v1_post_recv, .post_send = hns_roce_v1_post_send, @@ -4367,7 +4361,6 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, - .modify_cq = hns_roce_v1_modify_cq, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .query_qp = hns_roce_v1_query_qp, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1e8b3e4ef1b1..8cc2dae269af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -511,8 +511,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); - ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 05c7200751e5..c62cdd645696 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2682,8 +2682,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); @@ -2691,15 +2689,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == - IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + IB_LINK_LAYER_ETHERNET))) ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); - } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { @@ -2718,9 +2709,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 39ba7005f2c4..215d6618839b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4180,14 +4180,10 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - dev->ib_dev.uverbs_ex_cmd_mask = - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) @@ -4290,12 +4286,6 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; From de1049dd18bd7a2566f253fdb2e9a2e7d12f0b76 Mon Sep 17 00:00:00 2001 From: Lang Cheng Date: Fri, 11 Dec 2020 09:37:33 +0800 Subject: [PATCH 137/509] RDMA/hns: Fix coding style issues [ Upstream commit dc93a0d987fcfe93b132871e72d4ea5aff36dd5c ] Just format the code without modifying anything, including fixing some redundant and missing blanks and spaces and changing the variable definition order. Link: https://lore.kernel.org/r/1607650657-35992-8-git-send-email-liweihang@huawei.com Signed-off-by: Lang Cheng Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Stable-dep-of: cf5b608fb0e3 ("RDMA/hns: Fix hns_roce_table_get return value") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_cmd.c | 27 +++++++++++----------- drivers/infiniband/hw/hns/hns_roce_cmd.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_cq.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hem.c | 20 ++++++++-------- drivers/infiniband/hw/hns/hns_roce_hem.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 9 +++----- drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 6 ++--- drivers/infiniband/hw/hns/hns_roce_main.c | 6 ++--- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 1 - 13 files changed, 43 insertions(+), 51 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 455d533dd7c4..c493d7644b57 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -36,9 +36,9 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" -#define CMD_POLL_TOKEN 0xffff -#define CMD_MAX_NUM 32 -#define CMD_TOKEN_MASK 0x1f +#define CMD_POLL_TOKEN 0xffff +#define CMD_MAX_NUM 32 +#define CMD_TOKEN_MASK 0x1f static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, @@ -93,8 +93,8 @@ static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, u64 out_param) { - struct hns_roce_cmd_context - *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask]; + struct hns_roce_cmd_context *context = + &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds]; if (token != context->token) return; @@ -164,8 +164,8 @@ static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, int ret; down(&hr_dev->cmd.event_sem); - ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, timeout); up(&hr_dev->cmd.event_sem); return ret; @@ -231,9 +231,8 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; int i; - hr_cmd->context = kmalloc_array(hr_cmd->max_cmds, - sizeof(*hr_cmd->context), - GFP_KERNEL); + hr_cmd->context = + kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); if (!hr_cmd->context) return -ENOMEM; @@ -262,8 +261,8 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) hr_cmd->use_events = 0; } -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) { struct hns_roce_cmd_mailbox *mailbox; @@ -271,8 +270,8 @@ struct hns_roce_cmd_mailbox if (!mailbox) return ERR_PTR(-ENOMEM); - mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, - &mailbox->dma); + mailbox->buf = + dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma); if (!mailbox->buf) { kfree(mailbox); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 1915bacaded0..8e63b827f28c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -143,8 +143,8 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, unsigned long timeout); -struct hns_roce_cmd_mailbox - *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); +struct hns_roce_cmd_mailbox * +hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 8a6bded9c11c..9200e6477e1e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -41,9 +41,9 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_cq_table *cq_table; - struct ib_device *ibdev = &hr_dev->ib_dev; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index c880a8be7e3c..edc287a0a91a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -198,9 +198,9 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, { struct device *dev = hr_dev->dev; u32 chunk_ba_num; + u32 chunk_size; u32 table_idx; u32 bt_num; - u32 chunk_size; if (get_hem_table_config(hr_dev, mhop, table->type)) return -EINVAL; @@ -332,15 +332,15 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, { spinlock_t *lock = &hr_dev->bt_cmd_lock; struct device *dev = hr_dev->dev; - long end; - unsigned long flags; struct hns_roce_hem_iter iter; void __iomem *bt_cmd; __le32 bt_cmd_val[2]; __le32 bt_cmd_h = 0; + unsigned long flags; __le32 bt_cmd_l; - u64 bt_ba; int ret = 0; + u64 bt_ba; + long end; /* Find the HEM(Hardware Entry Memory) entry */ unsigned long i = (obj & (table->num_obj - 1)) / @@ -640,8 +640,8 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj) { struct device *dev = hr_dev->dev; - int ret = 0; unsigned long i; + int ret = 0; if (hns_roce_check_whether_mhop(hr_dev, table->type)) return hns_roce_table_mhop_get(hr_dev, table, obj); @@ -789,14 +789,14 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, struct hns_roce_hem_chunk *chunk; struct hns_roce_hem_mhop mhop; struct hns_roce_hem *hem; - void *addr = NULL; unsigned long mhop_obj = obj; unsigned long obj_per_chunk; unsigned long idx_offset; int offset, dma_offset; + void *addr = NULL; + u32 hem_idx = 0; int length; int i, j; - u32 hem_idx = 0; if (!table->lowmem) return NULL; @@ -966,8 +966,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, { struct hns_roce_hem_mhop mhop; u32 buf_chunk_size; - int i; u64 obj; + int i; if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) return; @@ -1298,8 +1298,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, const struct hns_roce_buf_region *regions, int region_cnt) { - struct roce_hem_item *hem, *temp_hem, *root_hem; struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; + struct roce_hem_item *hem, *temp_hem, *root_hem; const struct hns_roce_buf_region *r; struct list_head temp_root; struct list_head temp_btm; @@ -1404,8 +1404,8 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; - int ret; int unit; + int ret; int i; if (region_cnt > HNS_ROCE_MAX_BT_REGION) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index b34c940077bb..112243d112c2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -174,4 +174,4 @@ static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter) return sg_dma_address(&iter->chunk->mem[iter->page_idx]); } -#endif /*_HNS_ROCE_HEM_H*/ +#endif /* _HNS_ROCE_HEM_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index b3d5ba8ef439..cec705b58a84 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -239,7 +239,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, break; } - /*Ctrl field, ctrl set type: sig, solic, imm, fence */ + /* Ctrl field, ctrl set type: sig, solic, imm, fence */ /* SO wait for conforming application scenarios */ ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | @@ -300,7 +300,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, } ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE); } else { - /*sqe num is two */ + /* sqe num is two */ for (i = 0; i < wr->num_sge; i++) set_data_seg(dseg + i, wr->sg_list + i); @@ -1165,7 +1165,7 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) } raq->e_raq_buf->map = addr; - /* Configure raq extended address. 48bit 4K align*/ + /* Configure raq extended address. 48bit 4K align */ roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); /* Configure raq_shift */ @@ -2760,7 +2760,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qpc_bytes_16, QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -3793,7 +3792,6 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, int event_type; while ((aeqe = next_aeqe_sw_v1(eq))) { - /* Make sure we read the AEQ entry after we have checked the * ownership bit */ @@ -3898,7 +3896,6 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev, u32 cqn; while ((ceqe = next_ceqe_sw_v1(eq))) { - /* Make sure we read CEQ entry after we have checked the * ownership bit */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index ffd0156080f5..46ab0a321d21 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -419,7 +419,7 @@ struct hns_roce_wqe_data_seg { struct hns_roce_wqe_raddr_seg { __le32 rkey; - __le32 len;/* reserved */ + __le32 len; /* reserved */ __le64 raddr; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 76ed547b76ea..322f341f4145 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1028,8 +1028,8 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; - unsigned long instance_stage; /* the current instance stage */ - unsigned long reset_stage; /* the current reset stage */ + unsigned long instance_stage; /* the current instance stage */ + unsigned long reset_stage; /* the current reset stage */ unsigned long reset_cnt; bool sw_resetting; bool hw_resetting; @@ -2434,7 +2434,6 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, if (i < (pg_num - 1)) entry[i].blk_ba1_nxt_ptr |= (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S; - } link_tbl->npages = pg_num; link_tbl->pg_sz = buf_chk_sz; @@ -5540,16 +5539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: hns_roce_cq_event(hr_dev, cqn, event_type); break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, le16_to_cpu(aeqe->event.cmd.token), aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: - break; case HNS_ROCE_EVENT_TYPE_FLR: break; default: diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 8a92faeb3d23..8948d2b5577d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -440,7 +440,7 @@ struct hns_roce_srq_context { #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 #define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) -enum{ +enum { V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, }; @@ -1076,9 +1076,9 @@ struct hns_roce_v2_ud_send_wqe { __le32 dmac; __le32 byte_48; u8 dgid[GID_LEN_V2]; - }; -#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 + +#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) #define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 8cc2dae269af..90cbd15f6441 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -582,8 +582,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, @@ -723,8 +723,8 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) */ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; spin_lock_init(&hr_dev->sm_lock); spin_lock_init(&hr_dev->bt_cmd_lock); @@ -847,8 +847,8 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev) int hns_roce_init(struct hns_roce_dev *hr_dev) { - int ret; struct device *dev = hr_dev->dev; + int ret; if (hr_dev->hw->reset) { ret = hr_dev->hw->reset(hr_dev, true); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1c342a7bd7df..d5b3b10e0a80 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -167,10 +167,10 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { - int ret; unsigned long mtpt_idx = key_to_hw_index(mr->key); - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + int ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6fe98af7741b..c42c6761382d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -114,8 +114,8 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, enum hns_roce_event type) { - struct ib_event event; struct ib_qp *ibqp = &hr_qp->ibqp; + struct ib_event event; if (ibqp->event_handler) { event.device = ibqp->device; diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 08df97e0a665..02e2416b5fed 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -245,7 +245,6 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, err = -ENOMEM; goto err_idx_mtr; } - } return 0; From aa495b927f9ca9669cbba899ad6fb694c91f4814 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Fri, 21 May 2021 17:29:54 +0800 Subject: [PATCH 138/509] RDMA/hns: Use refcount_t APIs for HEM [ Upstream commit 82eb481da64586ccd287b2b2c5a086202c65e7eb ] refcount_t is better than integer for reference counting, it will WARN on overflow/underflow and avoid use-after-free risks. Link: https://lore.kernel.org/r/1621589395-2435-5-git-send-email-liweihang@huawei.com Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Stable-dep-of: cf5b608fb0e3 ("RDMA/hns: Fix hns_roce_table_get return value") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hem.c | 32 +++++++++++------------- drivers/infiniband/hw/hns/hns_roce_hem.h | 4 +-- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index edc287a0a91a..831e9476c628 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -260,7 +260,6 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, if (!hem) return NULL; - hem->refcount = 0; INIT_LIST_HEAD(&hem->chunk_list); order = get_order(hem_alloc_size); @@ -607,7 +606,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, mutex_lock(&table->mutex); if (table->hem[index.buf]) { - ++table->hem[index.buf]->refcount; + refcount_inc(&table->hem[index.buf]->refcount); goto out; } @@ -626,7 +625,7 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, } } - ++table->hem[index.buf]->refcount; + refcount_set(&table->hem[index.buf]->refcount, 1); goto out; err_alloc: @@ -652,7 +651,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, mutex_lock(&table->mutex); if (table->hem[i]) { - ++table->hem[i]->refcount; + refcount_inc(&table->hem[i]->refcount); goto out; } @@ -675,7 +674,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, goto out; } - ++table->hem[i]->refcount; + refcount_set(&table->hem[i]->refcount, 1); out: mutex_unlock(&table->mutex); return ret; @@ -742,11 +741,11 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, return; } - mutex_lock(&table->mutex); - if (check_refcount && (--table->hem[index.buf]->refcount > 0)) { - mutex_unlock(&table->mutex); + if (!check_refcount) + mutex_lock(&table->mutex); + else if (!refcount_dec_and_mutex_lock(&table->hem[index.buf]->refcount, + &table->mutex)) return; - } clear_mhop_hem(hr_dev, table, obj, &mhop, &index); free_mhop_hem(hr_dev, table, &mhop, &index); @@ -768,16 +767,15 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, i = (obj & (table->num_obj - 1)) / (table->table_chunk_size / table->obj_size); - mutex_lock(&table->mutex); + if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount, + &table->mutex)) + return; - if (--table->hem[i]->refcount == 0) { - /* Clear HEM base address */ - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - dev_warn(dev, "Clear HEM base address failed.\n"); + if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) + dev_warn(dev, "failed to clear HEM base address.\n"); - hns_roce_free_hem(hr_dev, table->hem[i]); - table->hem[i] = NULL; - } + hns_roce_free_hem(hr_dev, table->hem[i]); + table->hem[i] = NULL; mutex_unlock(&table->mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 112243d112c2..03d44e2efa47 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -87,8 +87,8 @@ struct hns_roce_hem_chunk { }; struct hns_roce_hem { - struct list_head chunk_list; - int refcount; + struct list_head chunk_list; + refcount_t refcount; }; struct hns_roce_hem_iter { From 9196f44239cf9b0d706ec68c1b54990a7b86d193 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Fri, 21 May 2021 17:29:55 +0800 Subject: [PATCH 139/509] RDMA/hns: Clean the hardware related code for HEM [ Upstream commit 68e11a6086b10e1a88d2b2c8432299f595db748d ] Move the HIP06 related code to the hw v1 source file for HEM. Link: https://lore.kernel.org/r/1621589395-2435-6-git-send-email-liweihang@huawei.com Signed-off-by: Xi Wang Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe Stable-dep-of: cf5b608fb0e3 ("RDMA/hns: Fix hns_roce_table_get return value") Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 - drivers/infiniband/hw/hns/hns_roce_hem.c | 82 +-------------------- drivers/infiniband/hw/hns/hns_roce_hem.h | 9 +-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 77 +++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 5 ++ 5 files changed, 85 insertions(+), 90 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d9aa7424d290..09b5e4935c2c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -46,8 +46,6 @@ #define HNS_ROCE_IB_MIN_SQ_STRIDE 6 -#define HNS_ROCE_BA_SIZE (32 * 4096) - #define BA_BYTE_LEN 8 /* Hardware specification only for v1 engine */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 831e9476c628..3c3187f22216 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -36,9 +36,6 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" -#define DMA_ADDR_T_SHIFT 12 -#define BT_BA_SHIFT 32 - #define HEM_INDEX_BUF BIT(0) #define HEM_INDEX_L0 BIT(1) #define HEM_INDEX_L1 BIT(2) @@ -326,81 +323,6 @@ void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) kfree(hem); } -static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, unsigned long obj) -{ - spinlock_t *lock = &hr_dev->bt_cmd_lock; - struct device *dev = hr_dev->dev; - struct hns_roce_hem_iter iter; - void __iomem *bt_cmd; - __le32 bt_cmd_val[2]; - __le32 bt_cmd_h = 0; - unsigned long flags; - __le32 bt_cmd_l; - int ret = 0; - u64 bt_ba; - long end; - - /* Find the HEM(Hardware Entry Memory) entry */ - unsigned long i = (obj & (table->num_obj - 1)) / - (table->table_chunk_size / table->obj_size); - - switch (table->type) { - case HEM_TYPE_QPC: - case HEM_TYPE_MTPT: - case HEM_TYPE_CQC: - case HEM_TYPE_SRQC: - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type); - break; - default: - return ret; - } - - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); - roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); - roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); - - /* Currently iter only a chunk */ - for (hns_roce_hem_first(table->hem[i], &iter); - !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { - bt_ba = hns_roce_hem_addr(&iter) >> DMA_ADDR_T_SHIFT; - - spin_lock_irqsave(lock, flags); - - bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; - - end = HW_SYNC_TIMEOUT_MSECS; - while (end > 0) { - if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT)) - break; - - mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); - end -= HW_SYNC_SLEEP_TIME_INTERVAL; - } - - if (end <= 0) { - dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); - spin_unlock_irqrestore(lock, flags); - return -EBUSY; - } - - bt_cmd_l = cpu_to_le32(bt_ba); - roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, - bt_ba >> BT_BA_SHIFT); - - bt_cmd_val[0] = bt_cmd_l; - bt_cmd_val[1] = bt_cmd_h; - hns_roce_write64_k(bt_cmd_val, - hr_dev->reg_base + ROCEE_BT_CMD_L_REG); - spin_unlock_irqrestore(lock, flags); - } - - return ret; -} - static int calc_hem_config(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, unsigned long obj, struct hns_roce_hem_mhop *mhop, @@ -666,7 +588,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, } /* Set HEM base address(128K/page, pa) to Hardware */ - if (hns_roce_set_hem(hr_dev, table, obj)) { + if (hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) { hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; ret = -ENODEV; @@ -771,7 +693,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev, &table->mutex)) return; - if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) + if (hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) dev_warn(dev, "failed to clear HEM base address.\n"); hns_roce_free_hem(hr_dev, table->hem[i]); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 03d44e2efa47..b7617786b100 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -34,9 +34,7 @@ #ifndef _HNS_ROCE_HEM_H #define _HNS_ROCE_HEM_H -#define HW_SYNC_SLEEP_TIME_INTERVAL 20 -#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL) -#define BT_CMD_SYNC_SHIFT 31 +#define HEM_HOP_STEP_DIRECT 0xff enum { /* MAP HEM(Hardware Entry Memory) */ @@ -73,11 +71,6 @@ enum { (type >= HEM_TYPE_MTT && hop_num == 1) || \ (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0)) -enum { - HNS_ROCE_HEM_PAGE_SHIFT = 12, - HNS_ROCE_HEM_PAGE_SIZE = 1 << HNS_ROCE_HEM_PAGE_SHIFT, -}; - struct hns_roce_hem_chunk { struct list_head list; int npages; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index cec705b58a84..6f9b024d4ff7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -450,6 +450,82 @@ static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); } +static int hns_roce_v1_set_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, int obj, + int step_idx) +{ + spinlock_t *lock = &hr_dev->bt_cmd_lock; + struct device *dev = hr_dev->dev; + struct hns_roce_hem_iter iter; + void __iomem *bt_cmd; + __le32 bt_cmd_val[2]; + __le32 bt_cmd_h = 0; + unsigned long flags; + __le32 bt_cmd_l; + int ret = 0; + u64 bt_ba; + long end; + + /* Find the HEM(Hardware Entry Memory) entry */ + unsigned long i = (obj & (table->num_obj - 1)) / + (table->table_chunk_size / table->obj_size); + + switch (table->type) { + case HEM_TYPE_QPC: + case HEM_TYPE_MTPT: + case HEM_TYPE_CQC: + case HEM_TYPE_SRQC: + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type); + break; + default: + return ret; + } + + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); + roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); + roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); + + /* Currently iter only a chunk */ + for (hns_roce_hem_first(table->hem[i], &iter); + !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { + bt_ba = hns_roce_hem_addr(&iter) >> HNS_HW_PAGE_SHIFT; + + spin_lock_irqsave(lock, flags); + + bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; + + end = HW_SYNC_TIMEOUT_MSECS; + while (end > 0) { + if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT)) + break; + + mdelay(HW_SYNC_SLEEP_TIME_INTERVAL); + end -= HW_SYNC_SLEEP_TIME_INTERVAL; + } + + if (end <= 0) { + dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); + spin_unlock_irqrestore(lock, flags); + return -EBUSY; + } + + bt_cmd_l = cpu_to_le32(bt_ba); + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, + upper_32_bits(bt_ba)); + + bt_cmd_val[0] = bt_cmd_l; + bt_cmd_val[1] = bt_cmd_h; + hns_roce_write64_k(bt_cmd_val, + hr_dev->reg_base + ROCEE_BT_CMD_L_REG); + spin_unlock_irqrestore(lock, flags); + } + + return ret; +} + static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, u32 odb_mode) { @@ -4358,6 +4434,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .set_mtu = hns_roce_v1_set_mtu, .write_mtpt = hns_roce_v1_write_mtpt, .write_cqc = hns_roce_v1_write_cqc, + .set_hem = hns_roce_v1_set_hem, .clear_hem = hns_roce_v1_clear_hem, .modify_qp = hns_roce_v1_modify_qp, .query_qp = hns_roce_v1_query_qp, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 46ab0a321d21..9ff1a41ddec3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -1042,6 +1042,11 @@ struct hns_roce_db_table { struct hns_roce_ext_db *ext_db; }; +#define HW_SYNC_SLEEP_TIME_INTERVAL 20 +#define HW_SYNC_TIMEOUT_MSECS (25 * HW_SYNC_SLEEP_TIME_INTERVAL) +#define BT_CMD_SYNC_SHIFT 31 +#define HNS_ROCE_BA_SIZE (32 * 4096) + struct hns_roce_bt_table { struct hns_roce_buf_list qpc_buf; struct hns_roce_buf_list mtpt_buf; From 220f86cc19dc1e30a9084c8f7264fa830a23b18b Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Tue, 23 May 2023 20:16:40 +0800 Subject: [PATCH 140/509] RDMA/hns: Fix hns_roce_table_get return value [ Upstream commit cf5b608fb0e369c473a8303cad6ddb386505e5b8 ] The return value of set_hem has been fixed to ENODEV, which will lead a diagnostic information missing. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/20230523121641.3132102-3-huangjunxian6@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hns/hns_roce_hem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 3c3187f22216..854b41c14774 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -588,11 +588,12 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, } /* Set HEM base address(128K/page, pa) to Hardware */ - if (hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT)) { + ret = hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT); + if (ret) { hns_roce_free_hem(hr_dev, table->hem[i]); table->hem[i] = NULL; - ret = -ENODEV; - dev_err(dev, "set HEM base address to HW failed.\n"); + dev_err(dev, "set HEM base address to HW failed, ret = %d.\n", + ret); goto out; } From 6817914c67b7965452310079ae2d00992d16dac2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 23 May 2023 17:35:16 +0200 Subject: [PATCH 141/509] ARM: dts: iwg20d-q7-common: Fix backlight pwm specifier [ Upstream commit 0501fdec106a291c43b3c1b525cf22ab4c24b2d8 ] make dtbs_check: arch/arm/boot/dts/renesas/r8a7743-iwg20d-q7.dtb: backlight: pwms: [[58, 0, 5000000], [0]] is too long From schema: Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml arch/arm/boot/dts/renesas/r8a7743-iwg20d-q7-dbcm-ca.dtb: backlight: pwms: [[67, 0, 5000000], [0]] is too long From schema: Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml arch/arm/boot/dts/renesas/r8a7744-iwg20d-q7-dbcm-ca.dtb: backlight: pwms: [[67, 0, 5000000], [0]] is too long From schema: Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml arch/arm/boot/dts/renesas/r8a7744-iwg20d-q7.dtb: backlight: pwms: [[58, 0, 5000000], [0]] is too long From schema: Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml PWM specifiers referring to R-Car PWM Timer Controllers should contain only two cells. Fix this by dropping the bogus third cell. Fixes: 6f89dd9e9325d05b ("ARM: dts: iwg20d-q7-common: Add LCD support") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/6e5c3167424a43faf8c1fa68d9667b3d87dc86d8.1684855911.git.geert+renesas@glider.be Signed-off-by: Sasha Levin --- arch/arm/boot/dts/iwg20d-q7-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/iwg20d-q7-common.dtsi b/arch/arm/boot/dts/iwg20d-q7-common.dtsi index 63cafd220dba..358f5477deef 100644 --- a/arch/arm/boot/dts/iwg20d-q7-common.dtsi +++ b/arch/arm/boot/dts/iwg20d-q7-common.dtsi @@ -49,7 +49,7 @@ audio_clock: audio_clock { lcd_backlight: backlight { compatible = "pwm-backlight"; - pwms = <&pwm3 0 5000000 0>; + pwms = <&pwm3 0 5000000>; brightness-levels = <0 4 8 16 32 64 128 255>; default-brightness-level = <7>; enable-gpios = <&gpio5 14 GPIO_ACTIVE_HIGH>; From 34e1e2f3cf5a956f9c94916906acb229fd9a7070 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 25 May 2023 10:48:22 +0200 Subject: [PATCH 142/509] arm64: dts: renesas: ulcb-kf: Remove flow control for SCIF1 [ Upstream commit 1a2c4e5635177939a088d22fa35c6a7032725663 ] The schematics are misleading, the flow control is for HSCIF1. We need SCIF1 for GNSS/GPS which does not use flow control. Fixes: c6c816e22bc8 ("arm64: dts: ulcb-kf: enable SCIF1") Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230525084823.4195-2-wsa+renesas@sang-engineering.com Signed-off-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/renesas/ulcb-kf.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi index 05e64bfad023..24d0a1337ae1 100644 --- a/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb-kf.dtsi @@ -270,7 +270,7 @@ hscif0_pins: hscif0 { }; scif1_pins: scif1 { - groups = "scif1_data_b", "scif1_ctrl"; + groups = "scif1_data_b"; function = "scif1"; }; @@ -330,7 +330,6 @@ rsnd_for_pcm3168a_capture: endpoint { &scif1 { pinctrl-0 = <&scif1_pins>; pinctrl-names = "default"; - uart-has-rtscts; status = "okay"; }; From ce6e0434e502abdf966164b7c72523fb5fe54635 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 4 Jun 2023 17:42:28 +0200 Subject: [PATCH 143/509] fbdev: omapfb: lcd_mipid: Fix an error handling path in mipid_spi_probe() [ Upstream commit 79a3908d1ea6c35157a6d907b1a9d8ec06015e7a ] If 'mipid_detect()' fails, we must free 'md' to avoid a memory leak. Fixes: 66d2f99d0bb5 ("omapfb: add support for MIPI-DCS compatible LCDs") Signed-off-by: Christophe JAILLET Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/omap/lcd_mipid.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/omap/lcd_mipid.c b/drivers/video/fbdev/omap/lcd_mipid.c index a75ae0c9b14c..d1cd8785d011 100644 --- a/drivers/video/fbdev/omap/lcd_mipid.c +++ b/drivers/video/fbdev/omap/lcd_mipid.c @@ -563,11 +563,15 @@ static int mipid_spi_probe(struct spi_device *spi) r = mipid_detect(md); if (r < 0) - return r; + goto free_md; omapfb_register_panel(&md->panel); return 0; + +free_md: + kfree(md); + return r; } static int mipid_spi_remove(struct spi_device *spi) From 17cd31487dc3089ccced8de1d26a1589d85c4201 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 19 Apr 2023 09:30:06 +0530 Subject: [PATCH 144/509] arm64: dts: ti: k3-j7200: Fix physical address of pin [ Upstream commit 3d011933000ed9054c649952d83162d24f020a93 ] wkup_pmx splits into multiple regions. Like wkup_pmx0 -> 13 pins (WKUP_PADCONFIG 0 - 12) wkup_pmx1 -> 2 pins (WKUP_PADCONFIG 14 - 15) wkup_pmx2 -> 59 pins (WKUP_PADCONFIG 26 - 84) wkup_pmx3 -> 8 pins (WKUP_PADCONFIG 93 - 100) With this split, pin offset needs to be adjusted to match with new pmx for all pins above wkup_pmx0. Example a pin under wkup_pmx1 should start from 0 instead of old offset(0x38 WKUP_PADCONFIG 14 offset) J7200 Datasheet (Table 6-106, Section 6.4 Pin Multiplexing) : https://www.ti.com/lit/ds/symlink/dra821u.pdf Fixes: 9ae21ac445e9 ("arm64: dts: ti: k3-j7200: Fix wakeup pinmux range") Signed-off-by: Keerthy Signed-off-by: Udit Kumar Link: https://lore.kernel.org/r/20230419040007.3022780-2-u-kumar1@ti.com Signed-off-by: Vignesh Raghavendra Signed-off-by: Sasha Levin --- .../dts/ti/k3-j7200-common-proc-board.dts | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts index 909ab6661aef..4ec5e955c33c 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts +++ b/arch/arm64/boot/dts/ti/k3-j7200-common-proc-board.dts @@ -19,25 +19,25 @@ chosen { &wkup_pmx2 { mcu_cpsw_pins_default: mcu-cpsw-pins-default { pinctrl-single,pins = < - J721E_WKUP_IOPAD(0x0068, PIN_OUTPUT, 0) /* MCU_RGMII1_TX_CTL */ - J721E_WKUP_IOPAD(0x006c, PIN_INPUT, 0) /* MCU_RGMII1_RX_CTL */ - J721E_WKUP_IOPAD(0x0070, PIN_OUTPUT, 0) /* MCU_RGMII1_TD3 */ - J721E_WKUP_IOPAD(0x0074, PIN_OUTPUT, 0) /* MCU_RGMII1_TD2 */ - J721E_WKUP_IOPAD(0x0078, PIN_OUTPUT, 0) /* MCU_RGMII1_TD1 */ - J721E_WKUP_IOPAD(0x007c, PIN_OUTPUT, 0) /* MCU_RGMII1_TD0 */ - J721E_WKUP_IOPAD(0x0088, PIN_INPUT, 0) /* MCU_RGMII1_RD3 */ - J721E_WKUP_IOPAD(0x008c, PIN_INPUT, 0) /* MCU_RGMII1_RD2 */ - J721E_WKUP_IOPAD(0x0090, PIN_INPUT, 0) /* MCU_RGMII1_RD1 */ - J721E_WKUP_IOPAD(0x0094, PIN_INPUT, 0) /* MCU_RGMII1_RD0 */ - J721E_WKUP_IOPAD(0x0080, PIN_OUTPUT, 0) /* MCU_RGMII1_TXC */ - J721E_WKUP_IOPAD(0x0084, PIN_INPUT, 0) /* MCU_RGMII1_RXC */ + J721E_WKUP_IOPAD(0x0000, PIN_OUTPUT, 0) /* MCU_RGMII1_TX_CTL */ + J721E_WKUP_IOPAD(0x0004, PIN_INPUT, 0) /* MCU_RGMII1_RX_CTL */ + J721E_WKUP_IOPAD(0x0008, PIN_OUTPUT, 0) /* MCU_RGMII1_TD3 */ + J721E_WKUP_IOPAD(0x000c, PIN_OUTPUT, 0) /* MCU_RGMII1_TD2 */ + J721E_WKUP_IOPAD(0x0010, PIN_OUTPUT, 0) /* MCU_RGMII1_TD1 */ + J721E_WKUP_IOPAD(0x0014, PIN_OUTPUT, 0) /* MCU_RGMII1_TD0 */ + J721E_WKUP_IOPAD(0x0020, PIN_INPUT, 0) /* MCU_RGMII1_RD3 */ + J721E_WKUP_IOPAD(0x0024, PIN_INPUT, 0) /* MCU_RGMII1_RD2 */ + J721E_WKUP_IOPAD(0x0028, PIN_INPUT, 0) /* MCU_RGMII1_RD1 */ + J721E_WKUP_IOPAD(0x002c, PIN_INPUT, 0) /* MCU_RGMII1_RD0 */ + J721E_WKUP_IOPAD(0x0018, PIN_OUTPUT, 0) /* MCU_RGMII1_TXC */ + J721E_WKUP_IOPAD(0x001c, PIN_INPUT, 0) /* MCU_RGMII1_RXC */ >; }; mcu_mdio_pins_default: mcu-mdio1-pins-default { pinctrl-single,pins = < - J721E_WKUP_IOPAD(0x009c, PIN_OUTPUT, 0) /* (L1) MCU_MDIO0_MDC */ - J721E_WKUP_IOPAD(0x0098, PIN_INPUT, 0) /* (L4) MCU_MDIO0_MDIO */ + J721E_WKUP_IOPAD(0x0034, PIN_OUTPUT, 0) /* (L1) MCU_MDIO0_MDC */ + J721E_WKUP_IOPAD(0x0030, PIN_INPUT, 0) /* (L4) MCU_MDIO0_MDIO */ >; }; }; From badeb7fe2450ec9ed1bab0ed71302c0908055bcb Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 6 Jun 2023 20:01:12 +0200 Subject: [PATCH 145/509] ARM: dts: stm32: Fix audio routing on STM32MP15xx DHCOM PDK2 [ Upstream commit e3f2778b1b6ced649bffdc7cbb05b80bb92f2108 ] The audio routing flow is not correct, the flow should be from source (second element in the pair) to sink (first element in the pair). The flow now is from "HP_OUT" to "Playback", where "Playback" is source and "HP_OUT" is sink, i.e. the direction is swapped and there is no direct link between the two either. Fill in the correct routing, where "HP_OUT" supplies the "Headphone Jack", "Line In Jack" supplies "LINE_IN" input, "Microphone Jack" supplies "MIC_IN" input and "Mic Bias" supplies "Microphone Jack". Fixes: 34e0c7847dcf ("ARM: dts: stm32: Add DH Electronics DHCOM STM32MP1 SoM and PDK2 board") Signed-off-by: Marek Vasut Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index fd0cd10cb093..2c391065135e 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -120,10 +120,13 @@ lcd_panel_in: endpoint { sound { compatible = "audio-graph-card"; - routing = - "MIC_IN", "Capture", - "Capture", "Mic Bias", - "Playback", "HP_OUT"; + widgets = "Headphone", "Headphone Jack", + "Line", "Line In Jack", + "Microphone", "Microphone Jack"; + routing = "Headphone Jack", "HP_OUT", + "LINE_IN", "Line In Jack", + "MIC_IN", "Microphone Jack", + "Microphone Jack", "Mic Bias"; dais = <&sai2a_port &sai2b_port>; status = "okay"; }; From 6e12311dcedd1125e0ed33eb29badf1fef65df5b Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 6 Jun 2023 13:56:04 +0200 Subject: [PATCH 146/509] ARM: dts: stm32: fix i2s endpoint format property for stm32mp15xx-dkx [ Upstream commit 076c74c592cabe4a47537fe5205b5b678bed010d ] Use "dai-format" to configure DAI audio format as specified in audio-graph-port.yaml bindings. Fixes: 144d1ba70548 ("ARM: dts: stm32: Adapt STM32MP157 DK boards to stm32 DT diversity") Signed-off-by: Olivier Moysan Signed-off-by: Alexandre Torgue Signed-off-by: Sasha Levin --- arch/arm/boot/dts/stm32mp15xx-dkx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi index 47df8ac67cf1..75869d6a1ab2 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi @@ -406,7 +406,7 @@ &i2s2 { i2s2_port: port { i2s2_endpoint: endpoint { remote-endpoint = <&sii9022_tx_endpoint>; - format = "i2s"; + dai-format = "i2s"; mclk-fs = <256>; }; }; From 4610efa404be030e1d718347f7e021081717a1fb Mon Sep 17 00:00:00 2001 From: Tim Harvey Date: Tue, 6 Jun 2023 08:30:04 -0700 Subject: [PATCH 147/509] hwmon: (gsc-hwmon) fix fan pwm temperature scaling [ Upstream commit a6d80df47ee2c69db99e4f2f8871aa4db154620b ] The GSC fan pwm temperature register is in centidegrees celcius but the Linux hwmon convention is to use milidegrees celcius. Fix the scaling. Fixes: 3bce5377ef66 ("hwmon: Add Gateworks System Controller support") Signed-off-by: Tim Harvey Link: https://lore.kernel.org/r/20230606153004.1448086-1-tharvey@gateworks.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/gsc-hwmon.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/gsc-hwmon.c b/drivers/hwmon/gsc-hwmon.c index f29ce49294da..89d036bf88df 100644 --- a/drivers/hwmon/gsc-hwmon.c +++ b/drivers/hwmon/gsc-hwmon.c @@ -82,8 +82,8 @@ static ssize_t pwm_auto_point_temp_store(struct device *dev, if (kstrtol(buf, 10, &temp)) return -EINVAL; - temp = clamp_val(temp, 0, 10000); - temp = DIV_ROUND_CLOSEST(temp, 10); + temp = clamp_val(temp, 0, 100000); + temp = DIV_ROUND_CLOSEST(temp, 100); regs[0] = temp & 0xff; regs[1] = (temp >> 8) & 0xff; @@ -100,7 +100,7 @@ static ssize_t pwm_auto_point_pwm_show(struct device *dev, { struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); - return sprintf(buf, "%d\n", 255 * (50 + (attr->index * 10)) / 100); + return sprintf(buf, "%d\n", 255 * (50 + (attr->index * 10))); } static SENSOR_DEVICE_ATTR_RO(pwm1_auto_point1_pwm, pwm_auto_point_pwm, 0); From a3c5d148b78b2f8bbade9396615676cb33640882 Mon Sep 17 00:00:00 2001 From: Chu Lin Date: Wed, 12 May 2021 17:10:43 +0000 Subject: [PATCH 148/509] hwmon: (adm1275) enable adm1272 temperature reporting [ Upstream commit 9da9c2dc57b2fa2e65521894cb66df4bf615214d ] adm1272 supports temperature reporting but it is disabled by default. Tested: ls temp1_* temp1_crit temp1_highest temp1_max temp1_crit_alarm temp1_input temp1_max_alarm cat temp1_input 26642 Signed-off-by: Chu Lin Link: https://lore.kernel.org/r/20210512171043.2433694-1-linchuyuan@google.com [groeck: Updated subject to reflect correct driver] Signed-off-by: Guenter Roeck Stable-dep-of: b153a0bb4199 ("hwmon: (pmbus/adm1275) Fix problems with temperature monitoring on ADM1272") Signed-off-by: Sasha Levin --- drivers/hwmon/pmbus/adm1275.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c index e7997f37b266..0be1b5777d2f 100644 --- a/drivers/hwmon/pmbus/adm1275.c +++ b/drivers/hwmon/pmbus/adm1275.c @@ -611,11 +611,13 @@ static int adm1275_probe(struct i2c_client *client) tindex = 8; info->func[0] |= PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT | - PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT; + PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | + PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP; - /* Enable VOUT if not enabled (it is disabled by default) */ - if (!(config & ADM1278_VOUT_EN)) { - config |= ADM1278_VOUT_EN; + /* Enable VOUT & TEMP1 if not enabled (disabled by default) */ + if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) != + (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) { + config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN; ret = i2c_smbus_write_byte_data(client, ADM1275_PMON_CONFIG, config); @@ -625,10 +627,6 @@ static int adm1275_probe(struct i2c_client *client) return -ENODEV; } } - - if (config & ADM1278_TEMP1_EN) - info->func[0] |= - PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP; if (config & ADM1278_VIN_EN) info->func[0] |= PMBUS_HAVE_VIN; break; From 580e9b987b89ad834cc6b91087f620dfa12f705c Mon Sep 17 00:00:00 2001 From: Potin Lai Date: Wed, 2 Mar 2022 20:38:16 +0800 Subject: [PATCH 149/509] hwmon: (adm1275) Allow setting sample averaging [ Upstream commit a3cd66d7cbadcc0c29884f25b754fd22699c719c ] Current driver assume PWR_AVG and VI_AVG as 1 by default, and user needs to set sample averaging via sysfs manually. This patch parses the properties "adi,power-sample-average" and "adi,volt-curr-sample-average" from device tree, and setting sample averaging during probe. Input value must be one of value in the list [1, 2, 4, 8, 16, 32, 64, 128]. Signed-off-by: Potin Lai Link: https://lore.kernel.org/r/20220302123817.27025-2-potin.lai@quantatw.com Signed-off-by: Guenter Roeck Stable-dep-of: b153a0bb4199 ("hwmon: (pmbus/adm1275) Fix problems with temperature monitoring on ADM1272") Signed-off-by: Sasha Levin --- drivers/hwmon/pmbus/adm1275.c | 40 ++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c index 0be1b5777d2f..92eb047ff246 100644 --- a/drivers/hwmon/pmbus/adm1275.c +++ b/drivers/hwmon/pmbus/adm1275.c @@ -475,6 +475,7 @@ static int adm1275_probe(struct i2c_client *client) int vindex = -1, voindex = -1, cindex = -1, pindex = -1; int tindex = -1; u32 shunt; + u32 avg; if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_READ_BYTE_DATA @@ -687,7 +688,7 @@ static int adm1275_probe(struct i2c_client *client) if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) != (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) { config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN; - ret = i2c_smbus_write_byte_data(client, + ret = i2c_smbus_write_word_data(client, ADM1275_PMON_CONFIG, config); if (ret < 0) { @@ -756,6 +757,43 @@ static int adm1275_probe(struct i2c_client *client) return -ENODEV; } + if (data->have_power_sampling && + of_property_read_u32(client->dev.of_node, + "adi,power-sample-average", &avg) == 0) { + if (!avg || avg > ADM1275_SAMPLES_AVG_MAX || + BIT(__fls(avg)) != avg) { + dev_err(&client->dev, + "Invalid number of power samples"); + return -EINVAL; + } + ret = adm1275_write_pmon_config(data, client, true, + ilog2(avg)); + if (ret < 0) { + dev_err(&client->dev, + "Setting power sample averaging failed with error %d", + ret); + return ret; + } + } + + if (of_property_read_u32(client->dev.of_node, + "adi,volt-curr-sample-average", &avg) == 0) { + if (!avg || avg > ADM1275_SAMPLES_AVG_MAX || + BIT(__fls(avg)) != avg) { + dev_err(&client->dev, + "Invalid number of voltage/current samples"); + return -EINVAL; + } + ret = adm1275_write_pmon_config(data, client, false, + ilog2(avg)); + if (ret < 0) { + dev_err(&client->dev, + "Setting voltage and current sample averaging failed with error %d", + ret); + return ret; + } + } + if (voindex < 0) voindex = vindex; if (vindex >= 0) { From 7e2edb84fe7c2c01543eb8a4976d55f2871ae3c7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 2 Jun 2023 14:34:47 -0700 Subject: [PATCH 150/509] hwmon: (pmbus/adm1275) Fix problems with temperature monitoring on ADM1272 [ Upstream commit b153a0bb4199566abd337119207f82b59a8cd1ca ] The PMON_CONFIG register on ADM1272 is a 16 bit register. Writing a 8 bit value into it clears the upper 8 bits of the register, resulting in unexpected side effects. Fix by writing the 16 bit register value. Also, it has been reported that temperature readings are sometimes widely inaccurate, to the point where readings may result in device shutdown due to errant overtemperature faults. Improve by enabling temperature sampling. While at it, move the common code for ADM1272 and ADM1278 into a separate function, and clarify in the error message that an attempt was made to enable both VOUT and temperature monitoring. Last but not least, return the error code reported by the underlying I2C controller and not -ENODEV if updating the PMON_CONFIG register fails. After all, this does not indicate that the chip is not present, but an error in the communication with the chip. Fixes: 4ff0ce227a1e ("hwmon: (pmbus/adm1275) Add support for ADM1272") Fixes: 9da9c2dc57b2 ("hwmon: (adm1275) enable adm1272 temperature reporting") Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20230602213447.3557346-1-linux@roeck-us.net Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/pmbus/adm1275.c | 52 +++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c index 92eb047ff246..c0618205758e 100644 --- a/drivers/hwmon/pmbus/adm1275.c +++ b/drivers/hwmon/pmbus/adm1275.c @@ -37,10 +37,13 @@ enum chips { adm1075, adm1272, adm1275, adm1276, adm1278, adm1293, adm1294 }; #define ADM1272_IRANGE BIT(0) +#define ADM1278_TSFILT BIT(15) #define ADM1278_TEMP1_EN BIT(3) #define ADM1278_VIN_EN BIT(2) #define ADM1278_VOUT_EN BIT(1) +#define ADM1278_PMON_DEFCONFIG (ADM1278_VOUT_EN | ADM1278_TEMP1_EN | ADM1278_TSFILT) + #define ADM1293_IRANGE_25 0 #define ADM1293_IRANGE_50 BIT(6) #define ADM1293_IRANGE_100 BIT(7) @@ -462,6 +465,22 @@ static const struct i2c_device_id adm1275_id[] = { }; MODULE_DEVICE_TABLE(i2c, adm1275_id); +/* Enable VOUT & TEMP1 if not enabled (disabled by default) */ +static int adm1275_enable_vout_temp(struct i2c_client *client, int config) +{ + int ret; + + if ((config & ADM1278_PMON_DEFCONFIG) != ADM1278_PMON_DEFCONFIG) { + config |= ADM1278_PMON_DEFCONFIG; + ret = i2c_smbus_write_word_data(client, ADM1275_PMON_CONFIG, config); + if (ret < 0) { + dev_err(&client->dev, "Failed to enable VOUT/TEMP1 monitoring\n"); + return ret; + } + } + return 0; +} + static int adm1275_probe(struct i2c_client *client) { s32 (*config_read_fn)(const struct i2c_client *client, u8 reg); @@ -615,19 +634,10 @@ static int adm1275_probe(struct i2c_client *client) PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP; - /* Enable VOUT & TEMP1 if not enabled (disabled by default) */ - if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) != - (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) { - config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN; - ret = i2c_smbus_write_byte_data(client, - ADM1275_PMON_CONFIG, - config); - if (ret < 0) { - dev_err(&client->dev, - "Failed to enable VOUT monitoring\n"); - return -ENODEV; - } - } + ret = adm1275_enable_vout_temp(client, config); + if (ret) + return ret; + if (config & ADM1278_VIN_EN) info->func[0] |= PMBUS_HAVE_VIN; break; @@ -684,19 +694,9 @@ static int adm1275_probe(struct i2c_client *client) PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP; - /* Enable VOUT & TEMP1 if not enabled (disabled by default) */ - if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) != - (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) { - config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN; - ret = i2c_smbus_write_word_data(client, - ADM1275_PMON_CONFIG, - config); - if (ret < 0) { - dev_err(&client->dev, - "Failed to enable VOUT monitoring\n"); - return -ENODEV; - } - } + ret = adm1275_enable_vout_temp(client, config); + if (ret) + return ret; if (config & ADM1278_VIN_EN) info->func[0] |= PMBUS_HAVE_VIN; From 245aa7c0233ed05fb61f6ef599254b069625eb3c Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Thu, 8 Jun 2023 17:36:29 +0200 Subject: [PATCH 151/509] ARM: dts: BCM5301X: fix duplex-full => full-duplex [ Upstream commit fd274b733bfdde3ca72f0fa2a37f032f3a8c402c ] this typo was found by the dtbs_check | ports:port@5:fixed-link: 'oneOf' conditional failed, | {'speed': [[1000]], 'duplex-full': True} is not of type 'array' | 'duplex-full' does not match any of the regexes: 'pinctrl-[0-]..." this should have been full-duplex; Fixes: 935327a73553 ("ARM: dts: BCM5301X: Add DT for Meraki MR26") Fixes: ec88a9c344d9 ("ARM: BCM5301X: Add DT for Meraki MR32") Signed-off-by: Christian Lamparter Link: https://lore.kernel.org/r/50522f45566951a9eabd22820647924cc6b4a264.1686238550.git.chunkeey@gmail.com Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/bcm53015-meraki-mr26.dts | 2 +- arch/arm/boot/dts/bcm53016-meraki-mr32.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm53015-meraki-mr26.dts b/arch/arm/boot/dts/bcm53015-meraki-mr26.dts index 14f58033efeb..ca2266b936ee 100644 --- a/arch/arm/boot/dts/bcm53015-meraki-mr26.dts +++ b/arch/arm/boot/dts/bcm53015-meraki-mr26.dts @@ -128,7 +128,7 @@ port@5 { fixed-link { speed = <1000>; - duplex-full; + full-duplex; }; }; }; diff --git a/arch/arm/boot/dts/bcm53016-meraki-mr32.dts b/arch/arm/boot/dts/bcm53016-meraki-mr32.dts index 577a4dc604d9..edf9910100b0 100644 --- a/arch/arm/boot/dts/bcm53016-meraki-mr32.dts +++ b/arch/arm/boot/dts/bcm53016-meraki-mr32.dts @@ -212,7 +212,7 @@ port@5 { fixed-link { speed = <1000>; - duplex-full; + full-duplex; }; }; }; From a77b80825bf1124c10e0e0deef3fc7e5abf023e0 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Thu, 11 May 2023 04:23:14 -0700 Subject: [PATCH 152/509] drm/amdkfd: Fix potential deallocation of previously deallocated memory. [ Upstream commit cabbdea1f1861098991768d7bbf5a49ed1608213 ] Pointer mqd_mem_obj can be deallocated in kfd_gtt_sa_allocate(). The function then returns non-zero value, which causes the second deallocation. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: d1f8f0d17d40 ("drm/amdkfd: Move non-sdma mqd allocation out of init_mqd") Signed-off-by: Daniil Dulov Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 3b6f5963180d..dadeb2013fd9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -113,18 +113,19 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, &(mqd_mem_obj->gtt_mem), &(mqd_mem_obj->gpu_addr), (void *)&(mqd_mem_obj->cpu_ptr), true); + + if (retval) { + kfree(mqd_mem_obj); + return NULL; + } } else { retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd), &mqd_mem_obj); - } - - if (retval) { - kfree(mqd_mem_obj); - return NULL; + if (retval) + return NULL; } return mqd_mem_obj; - } static void init_mqd(struct mqd_manager *mm, void **mqd, From e070120e6d683cf806b044057d2ecc8bd750c916 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 19 May 2023 08:33:27 -0700 Subject: [PATCH 153/509] drm/radeon: fix possible division-by-zero errors [ Upstream commit 1becc57cd1a905e2aa0e1eca60d2a37744525c4a ] Function rv740_get_decoded_reference_divider() may return 0 due to unpredictable reference divider value calculated in radeon_atom_get_clock_dividers(). This will lead to division-by-zero error once that value is used as a divider in calculating 'clk_s'. While unlikely, this issue should nonetheless be prevented so add a sanity check for such cases by testing 'decoded_ref' value against 0. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. v2: minor coding style fixes (Alex) In practice this should actually happen as the vbios should be properly populated. Fixes: 66229b200598 ("drm/radeon/kms: add dpm support for rv7xx (v4)") Signed-off-by: Nikita Zhandarovich Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/radeon/cypress_dpm.c | 8 ++++++-- drivers/gpu/drm/radeon/ni_dpm.c | 8 ++++++-- drivers/gpu/drm/radeon/rv740_dpm.c | 8 ++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c index 35b177d77791..7120710d188f 100644 --- a/drivers/gpu/drm/radeon/cypress_dpm.c +++ b/drivers/gpu/drm/radeon/cypress_dpm.c @@ -559,8 +559,12 @@ static int cypress_populate_mclk_value(struct radeon_device *rdev, ASIC_INTERNAL_MEMORY_SS, vco_freq)) { u32 reference_clock = rdev->clock.mpll.reference_freq; u32 decoded_ref = rv740_get_decoded_reference_divider(dividers.ref_div); - u32 clk_s = reference_clock * 5 / (decoded_ref * ss.rate); - u32 clk_v = ss.percentage * + u32 clk_s, clk_v; + + if (!decoded_ref) + return -EINVAL; + clk_s = reference_clock * 5 / (decoded_ref * ss.rate); + clk_v = ss.percentage * (0x4000 * dividers.whole_fb_div + 0x800 * dividers.frac_fb_div) / (clk_s * 625); mpll_ss1 &= ~CLKV_MASK; diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index a5218747742b..f79b348d36ad 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -2240,8 +2240,12 @@ static int ni_populate_mclk_value(struct radeon_device *rdev, ASIC_INTERNAL_MEMORY_SS, vco_freq)) { u32 reference_clock = rdev->clock.mpll.reference_freq; u32 decoded_ref = rv740_get_decoded_reference_divider(dividers.ref_div); - u32 clk_s = reference_clock * 5 / (decoded_ref * ss.rate); - u32 clk_v = ss.percentage * + u32 clk_s, clk_v; + + if (!decoded_ref) + return -EINVAL; + clk_s = reference_clock * 5 / (decoded_ref * ss.rate); + clk_v = ss.percentage * (0x4000 * dividers.whole_fb_div + 0x800 * dividers.frac_fb_div) / (clk_s * 625); mpll_ss1 &= ~CLKV_MASK; diff --git a/drivers/gpu/drm/radeon/rv740_dpm.c b/drivers/gpu/drm/radeon/rv740_dpm.c index 327d65a76e1f..79b2de65e905 100644 --- a/drivers/gpu/drm/radeon/rv740_dpm.c +++ b/drivers/gpu/drm/radeon/rv740_dpm.c @@ -250,8 +250,12 @@ int rv740_populate_mclk_value(struct radeon_device *rdev, ASIC_INTERNAL_MEMORY_SS, vco_freq)) { u32 reference_clock = rdev->clock.mpll.reference_freq; u32 decoded_ref = rv740_get_decoded_reference_divider(dividers.ref_div); - u32 clk_s = reference_clock * 5 / (decoded_ref * ss.rate); - u32 clk_v = 0x40000 * ss.percentage * + u32 clk_s, clk_v; + + if (!decoded_ref) + return -EINVAL; + clk_s = reference_clock * 5 / (decoded_ref * ss.rate); + clk_v = 0x40000 * ss.percentage * (dividers.whole_fb_div + (dividers.frac_fb_div / 8)) / (clk_s * 10000); mpll_ss1 &= ~CLKV_MASK; From 968e27fd037ec4732068820a9b9836eccc0e0a12 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 1 Jun 2023 15:44:12 -0700 Subject: [PATCH 154/509] amdgpu: validate offset_in_bo of drm_amdgpu_gem_va MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9f0bcf49e9895cb005d78b33a5eebfa11711b425 ] This is motivated by OOB access in amdgpu_vm_update_range when offset_in_bo+map_size overflows. v2: keep the validations in amdgpu_vm_bo_map v3: add the validations to amdgpu_vm_bo_map/amdgpu_vm_bo_replace_map rather than to amdgpu_gem_va_ioctl Fixes: 9f7eb5367d00 ("drm/amdgpu: actually use the VM map parameters") Reviewed-by: Christian Kƶnig Signed-off-by: Chia-I Wu Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c705ce11c436..8445bb7ae06a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2229,14 +2229,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t eaddr; /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || - size == 0 || size & ~PAGE_MASK) + if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) + return -EINVAL; + if (saddr + size <= saddr || offset + size <= offset) return -EINVAL; /* make sure object fit at this offset */ eaddr = saddr + size - 1; - if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo)) || + if ((bo && offset + size > amdgpu_bo_size(bo)) || (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; @@ -2295,14 +2295,14 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, int r; /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || - size == 0 || size & ~PAGE_MASK) + if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) + return -EINVAL; + if (saddr + size <= saddr || offset + size <= offset) return -EINVAL; /* make sure object fit at this offset */ eaddr = saddr + size - 1; - if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo)) || + if ((bo && offset + size > amdgpu_bo_size(bo)) || (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; From 9341501e2f7af29f5b5562c2840a7fde40eb7de4 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 9 Jun 2023 04:01:38 -0700 Subject: [PATCH 155/509] RDMA/bnxt_re: wraparound mbox producer index [ Upstream commit 0af91306e17ef3d18e5f100aa58aa787869118af ] Driver is not handling the wraparound of the mbox producer index correctly. Currently the wraparound happens once u32 max is reached. Bit 31 of the producer index register is special and should be set only once for the first command. Because the producer index overflow setting bit31 after a long time, FW goes to initialization sequence and this causes FW hang. Fix is to wraparound the mbox producer index once it reaches u16 max. Fixes: cee0c7bba486 ("RDMA/bnxt_re: Refactor command queue management code") Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1686308514-11996-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 4836bc433f53..212e5cd82d0d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -181,7 +181,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, } while (size > 0); cmdq->seq_num++; - cmdq_prod = hwq->prod; + cmdq_prod = hwq->prod & 0xFFFF; if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) { /* The very first doorbell write * is required to set this flag @@ -595,7 +595,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192; sginfo.pgsize = bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth); - hwq_attr.depth = rcfw->cmdq_depth; + hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF; hwq_attr.stride = BNXT_QPLIB_CMDQE_UNITS; hwq_attr.type = HWQ_TYPE_CTX; if (bnxt_qplib_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) { From e749bc5a90548b0e0eca5ca19d454fc7bd03b4f7 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 9 Jun 2023 04:01:39 -0700 Subject: [PATCH 156/509] RDMA/bnxt_re: Avoid calling wake_up threads from spin_lock context [ Upstream commit 3099bcdc19b701f732f638ee45679858c08559bb ] bnxt_qplib_service_creq can be called from interrupt or tasklet or process context. So the function take irq variant of spin_lock. But when wake_up is invoked with the lock held, it is putting the calling context to sleep. [exception RIP: __wake_up_common+190] RIP: ffffffffb7539d7e RSP: ffffa73300207ad8 RFLAGS: 00000083 RAX: 0000000000000001 RBX: ffff91fa295f69b8 RCX: dead000000000200 RDX: ffffa733344af940 RSI: ffffa73336527940 RDI: ffffa73336527940 RBP: 000000000000001c R8: 0000000000000002 R9: 00000000000299c0 R10: 0000017230de82c5 R11: 0000000000000002 R12: ffffa73300207b28 R13: 0000000000000000 R14: ffffa733341bf928 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 Call the wakeup after releasing the lock. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1686308514-11996-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 212e5cd82d0d..2b0c3a86293c 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -295,7 +295,8 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw, } static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, - struct creq_qp_event *qp_event) + struct creq_qp_event *qp_event, + u32 *num_wait) { struct creq_qp_error_notification *err_event; struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq; @@ -304,6 +305,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, u16 cbit, blocked = 0; struct pci_dev *pdev; unsigned long flags; + u32 wait_cmds = 0; __le16 mcookie; u16 cookie; int rc = 0; @@ -363,9 +365,10 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, crsqe->req_size = 0; if (!blocked) - wake_up(&rcfw->cmdq.waitq); + wait_cmds++; spin_unlock_irqrestore(&hwq->lock, flags); } + *num_wait += wait_cmds; return rc; } @@ -379,6 +382,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) struct creq_base *creqe; u32 sw_cons, raw_cons; unsigned long flags; + u32 num_wakeup = 0; /* Service the CREQ until budget is over */ spin_lock_irqsave(&hwq->lock, flags); @@ -397,7 +401,8 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) switch (type) { case CREQ_BASE_TYPE_QP_EVENT: bnxt_qplib_process_qp_event - (rcfw, (struct creq_qp_event *)creqe); + (rcfw, (struct creq_qp_event *)creqe, + &num_wakeup); creq->stats.creq_qp_event_processed++; break; case CREQ_BASE_TYPE_FUNC_EVENT: @@ -425,6 +430,8 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) rcfw->res->cctx, true); } spin_unlock_irqrestore(&hwq->lock, flags); + if (num_wakeup) + wake_up_nr(&rcfw->cmdq.waitq, num_wakeup); } static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance) From 294321349bd3b0680847fc2bbe66b9ab3e522fea Mon Sep 17 00:00:00 2001 From: Hao Luo Date: Tue, 11 Apr 2023 09:51:07 +0800 Subject: [PATCH 157/509] clk: imx: clk-imx8mn: fix memory leak in imx8mn_clocks_probe [ Upstream commit 188d070de9132667956f5aadd98d2bd87d3eac89 ] Use devm_of_iomap() instead of of_iomap() to automatically handle the unused ioremap region. If any error occurs, regions allocated by kzalloc() will leak, but using devm_kzalloc() instead will automatically free the memory using devm_kfree(). Fixes: daeb14545514 ("clk: imx: imx8mn: Switch to clk_hw based API") Fixes: 96d6392b54db ("clk: imx: Add support for i.MX8MN clock driver") Signed-off-by: Hao Luo Reviewed-by: Dongliang Mu Reviewed-by: Peng Fan Link: https://lore.kernel.org/r/20230411015107.2645-1-m202171776@hust.edu.cn Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx8mn.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c index 8a49e072d6e8..23f37a2cdf3a 100644 --- a/drivers/clk/imx/clk-imx8mn.c +++ b/drivers/clk/imx/clk-imx8mn.c @@ -291,7 +291,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) void __iomem *base; int ret; - clk_hw_data = kzalloc(struct_size(clk_hw_data, hws, + clk_hw_data = devm_kzalloc(dev, struct_size(clk_hw_data, hws, IMX8MN_CLK_END), GFP_KERNEL); if (WARN_ON(!clk_hw_data)) return -ENOMEM; @@ -308,10 +308,10 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) hws[IMX8MN_CLK_EXT4] = imx_obtain_fixed_clk_hw(np, "clk_ext4"); np = of_find_compatible_node(NULL, NULL, "fsl,imx8mn-anatop"); - base = of_iomap(np, 0); + base = devm_of_iomap(dev, np, 0, NULL); of_node_put(np); - if (WARN_ON(!base)) { - ret = -ENOMEM; + if (WARN_ON(IS_ERR(base))) { + ret = PTR_ERR(base); goto unregister_hws; } From cb047c13bbf9018693ae31f03a5a26b212d02f13 Mon Sep 17 00:00:00 2001 From: Yuxing Liu Date: Wed, 3 May 2023 07:06:07 +0000 Subject: [PATCH 158/509] clk: imx: clk-imx8mp: improve error handling in imx8mp_clocks_probe() [ Upstream commit 878b02d5f3b56cb090dbe2c70c89273be144087f ] Replace of_iomap() and kzalloc() with devm_of_iomap() and devm_kzalloc() which can automatically release the related memory when the device or driver is removed or unloaded to avoid potential memory leak. In this case, iounmap(anatop_base) in line 427,433 are removed as manual release is not required. Besides, referring to clk-imx8mq.c, check the return code of of_clk_add_hw_provider, if it returns negtive, print error info and unregister hws, which makes the program more robust. Fixes: 9c140d992676 ("clk: imx: Add support for i.MX8MP clock driver") Signed-off-by: Yuxing Liu Reviewed-by: Dongliang Mu Reviewed-by: Abel Vesa Link: https://lore.kernel.org/r/20230503070607.2462-1-lyx2022@hust.edu.cn Signed-off-by: Abel Vesa Signed-off-by: Sasha Levin --- drivers/clk/imx/clk-imx8mp.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c index 72592e35836b..98a4711ef38d 100644 --- a/drivers/clk/imx/clk-imx8mp.c +++ b/drivers/clk/imx/clk-imx8mp.c @@ -425,25 +425,22 @@ static int imx8mp_clocks_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; void __iomem *anatop_base, *ccm_base; + int err; np = of_find_compatible_node(NULL, NULL, "fsl,imx8mp-anatop"); - anatop_base = of_iomap(np, 0); + anatop_base = devm_of_iomap(dev, np, 0, NULL); of_node_put(np); - if (WARN_ON(!anatop_base)) - return -ENOMEM; + if (WARN_ON(IS_ERR(anatop_base))) + return PTR_ERR(anatop_base); np = dev->of_node; ccm_base = devm_platform_ioremap_resource(pdev, 0); - if (WARN_ON(IS_ERR(ccm_base))) { - iounmap(anatop_base); + if (WARN_ON(IS_ERR(ccm_base))) return PTR_ERR(ccm_base); - } - clk_hw_data = kzalloc(struct_size(clk_hw_data, hws, IMX8MP_CLK_END), GFP_KERNEL); - if (WARN_ON(!clk_hw_data)) { - iounmap(anatop_base); + clk_hw_data = devm_kzalloc(dev, struct_size(clk_hw_data, hws, IMX8MP_CLK_END), GFP_KERNEL); + if (WARN_ON(!clk_hw_data)) return -ENOMEM; - } clk_hw_data->num = IMX8MP_CLK_END; hws = clk_hw_data->hws; @@ -743,7 +740,12 @@ static int imx8mp_clocks_probe(struct platform_device *pdev) imx_check_clk_hws(hws, IMX8MP_CLK_END); - of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_hw_data); + err = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_hw_data); + if (err < 0) { + dev_err(dev, "failed to register hws for i.MX8MP\n"); + imx_unregister_hw_clocks(hws, IMX8MP_CLK_END); + return err; + } imx_register_uart_clocks(4); From 404e9f741acfb188212f7142d91e247630dd77cc Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Fri, 9 Dec 2022 09:41:24 +0000 Subject: [PATCH 159/509] clk: tegra: tegra124-emc: Fix potential memory leak [ Upstream commit 53a06e5924c0d43c11379a08c5a78529c3e61595 ] The tegra and tegra needs to be freed in the error handling path, otherwise it will be leaked. Fixes: 2db04f16b589 ("clk: tegra: Add EMC clock driver") Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20221209094124.71043-1-yuancan@huawei.com Acked-by: Thierry Reding Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/tegra/clk-tegra124-emc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/tegra/clk-tegra124-emc.c b/drivers/clk/tegra/clk-tegra124-emc.c index 733a962ff521..15f728edc54b 100644 --- a/drivers/clk/tegra/clk-tegra124-emc.c +++ b/drivers/clk/tegra/clk-tegra124-emc.c @@ -455,6 +455,7 @@ static int load_timings_from_dt(struct tegra_clk_emc *tegra, err = load_one_timing_from_dt(tegra, timing, child); if (err) { of_node_put(child); + kfree(tegra->timings); return err; } @@ -506,6 +507,7 @@ struct clk *tegra_clk_register_emc(void __iomem *base, struct device_node *np, err = load_timings_from_dt(tegra, node, node_ram_code); if (err) { of_node_put(node); + kfree(tegra); return ERR_PTR(err); } } From f923a582217b198b557756809ffe42ac0fad6adb Mon Sep 17 00:00:00 2001 From: Su Hui Date: Thu, 15 Jun 2023 10:17:32 +0800 Subject: [PATCH 160/509] ALSA: ac97: Fix possible NULL dereference in snd_ac97_mixer [ Upstream commit 79597c8bf64ca99eab385115743131d260339da5 ] smatch error: sound/pci/ac97/ac97_codec.c:2354 snd_ac97_mixer() error: we previously assumed 'rac97' could be null (see line 2072) remove redundant assignment, return error if rac97 is NULL. Fixes: da3cec35dd3c ("ALSA: Kill snd_assert() in sound/pci/*") Signed-off-by: Su Hui Link: https://lore.kernel.org/r/20230615021732.1972194-1-suhui@nfschina.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/ac97/ac97_codec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c index cd66632bf1c3..e18572eae5e0 100644 --- a/sound/pci/ac97/ac97_codec.c +++ b/sound/pci/ac97/ac97_codec.c @@ -2007,8 +2007,8 @@ int snd_ac97_mixer(struct snd_ac97_bus *bus, struct snd_ac97_template *template, .dev_disconnect = snd_ac97_dev_disconnect, }; - if (rac97) - *rac97 = NULL; + if (!rac97) + return -EINVAL; if (snd_BUG_ON(!bus || !template)) return -EINVAL; if (snd_BUG_ON(template->num >= 4)) From c3b63584d8c2aae2ff1d4f7cd0aa860568bfefcf Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 12 Jun 2023 21:25:33 +0300 Subject: [PATCH 161/509] drm/msm/dpu: do not enable color-management if DSPPs are not available [ Upstream commit 3bcfc7b90465efd337d39b91b43972162f0d1908 ] We can not support color management without DSPP blocks being provided in the HW catalog. Do not enable color management for CRTCs if num_dspps is 0. Fixes: 4259ff7ae509 ("drm/msm/dpu: add support for pcc color block in dpu driver") Reported-by: Yongqin Liu Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Reviewed-by: Marijn Suijten Reviewed-by: Sumit Semwal Tested-by: Yongqin Liu Patchwork: https://patchwork.freedesktop.org/patch/542141/ Link: https://lore.kernel.org/r/20230612182534.3345805-1-dmitry.baryshkov@linaro.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 5afb3c544653..4c64e2d4f650 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1262,6 +1262,8 @@ static const struct drm_crtc_helper_funcs dpu_crtc_helper_funcs = { struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, struct drm_plane *cursor) { + struct msm_drm_private *priv = dev->dev_private; + struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms); struct drm_crtc *crtc = NULL; struct dpu_crtc *dpu_crtc = NULL; int i; @@ -1293,7 +1295,8 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane, drm_crtc_helper_add(crtc, &dpu_crtc_helper_funcs); - drm_crtc_enable_color_mgmt(crtc, 0, true, 0); + if (dpu_kms->catalog->dspp_count) + drm_crtc_enable_color_mgmt(crtc, 0, true, 0); /* save user friendly CRTC name for later */ snprintf(dpu_crtc->name, DPU_CRTC_NAME_SIZE, "crtc%u", crtc->base.id); From c67a55f7cc8d767d624235bf1bcd0947e56abe0f Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 12 Jun 2023 15:02:59 -0700 Subject: [PATCH 162/509] drm/msm/dp: Free resources after unregistering them [ Upstream commit fa0048a4b1fa7a50c8b0e514f5b428abdf69a6f8 ] The DP component's unbind operation walks through the submodules to unregister and clean things up. But if the unbind happens because the DP controller itself is being removed, all the memory for those submodules has just been freed. Change the order of these operations to avoid the many use-after-free that otherwise happens in this code path. Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/542166/ Link: https://lore.kernel.org/r/20230612220259.1884381-1-quic_bjorande@quicinc.com Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dp/dp_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 0bcccf422192..4da8cea76a11 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -1267,9 +1267,9 @@ static int dp_display_remove(struct platform_device *pdev) dp = container_of(g_dp_display, struct dp_display_private, dp_display); + component_del(&pdev->dev, &dp_display_comp_ops); dp_display_deinit_sub_modules(dp); - component_del(&pdev->dev, &dp_display_comp_ops); platform_set_drvdata(pdev, NULL); return 0; From 866d4340c6c994768ecfa5da5c96e87996304ea6 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 30 May 2023 12:39:06 +0300 Subject: [PATCH 163/509] clk: vc5: check memory returned by kasprintf() [ Upstream commit 144601f6228de5598f03e693822b60a95c367a17 ] kasprintf() returns a pointer to dynamically allocated memory. Pointer could be NULL in case allocation fails. Check pointer validity. Identified with coccinelle (kmerr.cocci script). Fixes: f491276a5168 ("clk: vc5: Allow Versaclock driver to support multiple instances") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230530093913.1656095-2-claudiu.beznea@microchip.com Reviewed-by: Luca Ceresoli Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-versaclock5.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/clk/clk-versaclock5.c b/drivers/clk/clk-versaclock5.c index eb597ea7bb87..3ddb974da039 100644 --- a/drivers/clk/clk-versaclock5.c +++ b/drivers/clk/clk-versaclock5.c @@ -906,6 +906,11 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) } init.name = kasprintf(GFP_KERNEL, "%pOFn.mux", client->dev.of_node); + if (!init.name) { + ret = -ENOMEM; + goto err_clk; + } + init.ops = &vc5_mux_ops; init.flags = 0; init.parent_names = parent_names; @@ -920,6 +925,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) memset(&init, 0, sizeof(init)); init.name = kasprintf(GFP_KERNEL, "%pOFn.dbl", client->dev.of_node); + if (!init.name) { + ret = -ENOMEM; + goto err_clk; + } init.ops = &vc5_dbl_ops; init.flags = CLK_SET_RATE_PARENT; init.parent_names = parent_names; @@ -935,6 +944,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) /* Register PFD */ memset(&init, 0, sizeof(init)); init.name = kasprintf(GFP_KERNEL, "%pOFn.pfd", client->dev.of_node); + if (!init.name) { + ret = -ENOMEM; + goto err_clk; + } init.ops = &vc5_pfd_ops; init.flags = CLK_SET_RATE_PARENT; init.parent_names = parent_names; @@ -952,6 +965,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) /* Register PLL */ memset(&init, 0, sizeof(init)); init.name = kasprintf(GFP_KERNEL, "%pOFn.pll", client->dev.of_node); + if (!init.name) { + ret = -ENOMEM; + goto err_clk; + } init.ops = &vc5_pll_ops; init.flags = CLK_SET_RATE_PARENT; init.parent_names = parent_names; @@ -971,6 +988,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) memset(&init, 0, sizeof(init)); init.name = kasprintf(GFP_KERNEL, "%pOFn.fod%d", client->dev.of_node, idx); + if (!init.name) { + ret = -ENOMEM; + goto err_clk; + } init.ops = &vc5_fod_ops; init.flags = CLK_SET_RATE_PARENT; init.parent_names = parent_names; @@ -989,6 +1010,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) memset(&init, 0, sizeof(init)); init.name = kasprintf(GFP_KERNEL, "%pOFn.out0_sel_i2cb", client->dev.of_node); + if (!init.name) { + ret = -ENOMEM; + goto err_clk; + } init.ops = &vc5_clk_out_ops; init.flags = CLK_SET_RATE_PARENT; init.parent_names = parent_names; @@ -1015,6 +1040,10 @@ static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id) memset(&init, 0, sizeof(init)); init.name = kasprintf(GFP_KERNEL, "%pOFn.out%d", client->dev.of_node, idx + 1); + if (!init.name) { + ret = -ENOMEM; + goto err_clk; + } init.ops = &vc5_clk_out_ops; init.flags = CLK_SET_RATE_PARENT; init.parent_names = parent_names; From f64fcd3acf1f93f7c21531d90bb029bd72300a3b Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 30 May 2023 12:39:07 +0300 Subject: [PATCH 164/509] clk: cdce925: check return value of kasprintf() [ Upstream commit bb7d09ddbf361d51eae46f38e7c8a2b85914ea2a ] kasprintf() returns a pointer to dynamically allocated memory. Pointer could be NULL in case allocation fails. Check pointer validity. Identified with coccinelle (kmerr.cocci script). Fixes: 19fbbbbcd3a3 ("Add TI CDCE925 I2C controlled clock synthesizer driver") Depends-on: e665f029a283 ("clk: Convert to using %pOFn instead of device_node.name") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230530093913.1656095-3-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-cdce925.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/clk/clk-cdce925.c b/drivers/clk/clk-cdce925.c index 308b353815e1..470d91d7314d 100644 --- a/drivers/clk/clk-cdce925.c +++ b/drivers/clk/clk-cdce925.c @@ -705,6 +705,10 @@ static int cdce925_probe(struct i2c_client *client, for (i = 0; i < data->chip_info->num_plls; ++i) { pll_clk_name[i] = kasprintf(GFP_KERNEL, "%pOFn.pll%d", client->dev.of_node, i); + if (!pll_clk_name[i]) { + err = -ENOMEM; + goto error; + } init.name = pll_clk_name[i]; data->pll[i].chip = data; data->pll[i].hw.init = &init; @@ -746,6 +750,10 @@ static int cdce925_probe(struct i2c_client *client, init.num_parents = 1; init.parent_names = &parent_name; /* Mux Y1 to input */ init.name = kasprintf(GFP_KERNEL, "%pOFn.Y1", client->dev.of_node); + if (!init.name) { + err = -ENOMEM; + goto error; + } data->clk[0].chip = data; data->clk[0].hw.init = &init; data->clk[0].index = 0; @@ -764,6 +772,10 @@ static int cdce925_probe(struct i2c_client *client, for (i = 1; i < data->chip_info->num_outputs; ++i) { init.name = kasprintf(GFP_KERNEL, "%pOFn.Y%d", client->dev.of_node, i+1); + if (!init.name) { + err = -ENOMEM; + goto error; + } data->clk[i].chip = data; data->clk[i].hw.init = &init; data->clk[i].index = i; From fc813d05739ec5ae8f67f73488dbfb9de827acb9 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Thu, 25 Mar 2021 13:26:40 -0600 Subject: [PATCH 165/509] clk: si5341: Allow different output VDD_SEL values [ Upstream commit b7bbf6ec4940d1a69811ec354edeeb9751fa8e85 ] The driver was not previously programming the VDD_SEL values for each output to indicate what external VDDO voltage was used for each. Add ability to specify a regulator supplying the VDDO pin for each output of the device. The voltage of the regulator is used to automatically set the VDD_SEL value appropriately. If no regulator is specified and the chip is being reconfigured, assume 2.5V which appears to be the chip default. Signed-off-by: Robert Hancock Link: https://lore.kernel.org/r/20210325192643.2190069-7-robert.hancock@calian.com Signed-off-by: Stephen Boyd Stable-dep-of: 2560114c06d7 ("clk: si5341: return error if one synth clock registration fails") Signed-off-by: Sasha Levin --- drivers/clk/clk-si5341.c | 136 +++++++++++++++++++++++++++++++-------- 1 file changed, 110 insertions(+), 26 deletions(-) diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index 382a0619a048..64d962c54bba 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -59,6 +60,7 @@ struct clk_si5341_synth { struct clk_si5341_output { struct clk_hw hw; struct clk_si5341 *data; + struct regulator *vddo_reg; u8 index; }; #define to_clk_si5341_output(_hw) \ @@ -84,6 +86,7 @@ struct clk_si5341 { struct clk_si5341_output_config { u8 out_format_drv_bits; u8 out_cm_ampl_bits; + u8 vdd_sel_bits; bool synth_master; bool always_on; }; @@ -136,6 +139,8 @@ struct clk_si5341_output_config { #define SI5341_OUT_R_REG(output) \ ((output)->data->reg_rdiv_offset[(output)->index]) +#define SI5341_OUT_MUX_VDD_SEL_MASK 0x38 + /* Synthesize N divider */ #define SI5341_SYNTH_N_NUM(x) (0x0302 + ((x) * 11)) #define SI5341_SYNTH_N_DEN(x) (0x0308 + ((x) * 11)) @@ -1250,11 +1255,11 @@ static const struct regmap_config si5341_regmap_config = { .volatile_table = &si5341_regmap_volatile, }; -static int si5341_dt_parse_dt(struct i2c_client *client, - struct clk_si5341_output_config *config) +static int si5341_dt_parse_dt(struct clk_si5341 *data, + struct clk_si5341_output_config *config) { struct device_node *child; - struct device_node *np = client->dev.of_node; + struct device_node *np = data->i2c_client->dev.of_node; u32 num; u32 val; @@ -1263,13 +1268,13 @@ static int si5341_dt_parse_dt(struct i2c_client *client, for_each_child_of_node(np, child) { if (of_property_read_u32(child, "reg", &num)) { - dev_err(&client->dev, "missing reg property of %s\n", + dev_err(&data->i2c_client->dev, "missing reg property of %s\n", child->name); goto put_child; } if (num >= SI5341_MAX_NUM_OUTPUTS) { - dev_err(&client->dev, "invalid clkout %d\n", num); + dev_err(&data->i2c_client->dev, "invalid clkout %d\n", num); goto put_child; } @@ -1288,7 +1293,7 @@ static int si5341_dt_parse_dt(struct i2c_client *client, config[num].out_format_drv_bits |= 0xc0; break; default: - dev_err(&client->dev, + dev_err(&data->i2c_client->dev, "invalid silabs,format %u for %u\n", val, num); goto put_child; @@ -1301,7 +1306,7 @@ static int si5341_dt_parse_dt(struct i2c_client *client, if (!of_property_read_u32(child, "silabs,common-mode", &val)) { if (val > 0xf) { - dev_err(&client->dev, + dev_err(&data->i2c_client->dev, "invalid silabs,common-mode %u\n", val); goto put_child; @@ -1312,7 +1317,7 @@ static int si5341_dt_parse_dt(struct i2c_client *client, if (!of_property_read_u32(child, "silabs,amplitude", &val)) { if (val > 0xf) { - dev_err(&client->dev, + dev_err(&data->i2c_client->dev, "invalid silabs,amplitude %u\n", val); goto put_child; @@ -1329,6 +1334,34 @@ static int si5341_dt_parse_dt(struct i2c_client *client, config[num].always_on = of_property_read_bool(child, "always-on"); + + config[num].vdd_sel_bits = 0x08; + if (data->clk[num].vddo_reg) { + int vdd = regulator_get_voltage(data->clk[num].vddo_reg); + + switch (vdd) { + case 3300000: + config[num].vdd_sel_bits |= 0 << 4; + break; + case 1800000: + config[num].vdd_sel_bits |= 1 << 4; + break; + case 2500000: + config[num].vdd_sel_bits |= 2 << 4; + break; + default: + dev_err(&data->i2c_client->dev, + "unsupported vddo voltage %d for %s\n", + vdd, child->name); + goto put_child; + } + } else { + /* chip seems to default to 2.5V when not set */ + dev_warn(&data->i2c_client->dev, + "no regulator set, defaulting vdd_sel to 2.5V for %s\n", + child->name); + config[num].vdd_sel_bits |= 2 << 4; + } } return 0; @@ -1454,9 +1487,33 @@ static int si5341_probe(struct i2c_client *client, } } - err = si5341_dt_parse_dt(client, config); + for (i = 0; i < SI5341_MAX_NUM_OUTPUTS; ++i) { + char reg_name[10]; + + snprintf(reg_name, sizeof(reg_name), "vddo%d", i); + data->clk[i].vddo_reg = devm_regulator_get_optional( + &client->dev, reg_name); + if (IS_ERR(data->clk[i].vddo_reg)) { + err = PTR_ERR(data->clk[i].vddo_reg); + data->clk[i].vddo_reg = NULL; + if (err == -ENODEV) + continue; + goto cleanup; + } else { + err = regulator_enable(data->clk[i].vddo_reg); + if (err) { + dev_err(&client->dev, + "failed to enable %s regulator: %d\n", + reg_name, err); + data->clk[i].vddo_reg = NULL; + goto cleanup; + } + } + } + + err = si5341_dt_parse_dt(data, config); if (err) - return err; + goto cleanup; if (of_property_read_string(client->dev.of_node, "clock-output-names", &init.name)) @@ -1464,21 +1521,23 @@ static int si5341_probe(struct i2c_client *client, root_clock_name = init.name; data->regmap = devm_regmap_init_i2c(client, &si5341_regmap_config); - if (IS_ERR(data->regmap)) - return PTR_ERR(data->regmap); + if (IS_ERR(data->regmap)) { + err = PTR_ERR(data->regmap); + goto cleanup; + } i2c_set_clientdata(client, data); err = si5341_probe_chip_id(data); if (err < 0) - return err; + goto cleanup; if (of_property_read_bool(client->dev.of_node, "silabs,reprogram")) { initialization_required = true; } else { err = si5341_is_programmed_already(data); if (err < 0) - return err; + goto cleanup; initialization_required = !err; } @@ -1487,11 +1546,11 @@ static int si5341_probe(struct i2c_client *client, /* Populate the regmap cache in preparation for "cache only" */ err = si5341_read_settings(data); if (err < 0) - return err; + goto cleanup; err = si5341_send_preamble(data); if (err < 0) - return err; + goto cleanup; /* * We intend to send all 'final' register values in a single @@ -1504,19 +1563,19 @@ static int si5341_probe(struct i2c_client *client, err = si5341_write_multiple(data, si5341_reg_defaults, ARRAY_SIZE(si5341_reg_defaults)); if (err < 0) - return err; + goto cleanup; } /* Input must be up and running at this point */ err = si5341_clk_select_active_input(data); if (err < 0) - return err; + goto cleanup; if (initialization_required) { /* PLL configuration is required */ err = si5341_initialize_pll(data); if (err < 0) - return err; + goto cleanup; } /* Register the PLL */ @@ -1529,7 +1588,7 @@ static int si5341_probe(struct i2c_client *client, err = devm_clk_hw_register(&client->dev, &data->hw); if (err) { dev_err(&client->dev, "clock registration failed\n"); - return err; + goto cleanup; } init.num_parents = 1; @@ -1566,13 +1625,17 @@ static int si5341_probe(struct i2c_client *client, regmap_write(data->regmap, SI5341_OUT_CM(&data->clk[i]), config[i].out_cm_ampl_bits); + regmap_update_bits(data->regmap, + SI5341_OUT_MUX_SEL(&data->clk[i]), + SI5341_OUT_MUX_VDD_SEL_MASK, + config[i].vdd_sel_bits); } err = devm_clk_hw_register(&client->dev, &data->clk[i].hw); kfree(init.name); /* clock framework made a copy of the name */ if (err) { dev_err(&client->dev, "output %u registration failed\n", i); - return err; + goto cleanup; } if (config[i].always_on) clk_prepare(data->clk[i].hw.clk); @@ -1582,7 +1645,7 @@ static int si5341_probe(struct i2c_client *client, data); if (err) { dev_err(&client->dev, "unable to add clk provider\n"); - return err; + goto cleanup; } if (initialization_required) { @@ -1590,11 +1653,11 @@ static int si5341_probe(struct i2c_client *client, regcache_cache_only(data->regmap, false); err = regcache_sync(data->regmap); if (err < 0) - return err; + goto cleanup; err = si5341_finalize_defaults(data); if (err < 0) - return err; + goto cleanup; } /* wait for device to report input clock present and PLL lock */ @@ -1603,14 +1666,14 @@ static int si5341_probe(struct i2c_client *client, 10000, 250000); if (err) { dev_err(&client->dev, "Error waiting for input clock or PLL lock\n"); - return err; + goto cleanup; } /* clear sticky alarm bits from initialization */ err = regmap_write(data->regmap, SI5341_STATUS_STICKY, 0); if (err) { dev_err(&client->dev, "unable to clear sticky status\n"); - return err; + goto cleanup; } /* Free the names, clk framework makes copies */ @@ -1618,6 +1681,26 @@ static int si5341_probe(struct i2c_client *client, devm_kfree(&client->dev, (void *)synth_clock_names[i]); return 0; + +cleanup: + for (i = 0; i < SI5341_MAX_NUM_OUTPUTS; ++i) { + if (data->clk[i].vddo_reg) + regulator_disable(data->clk[i].vddo_reg); + } + return err; +} + +static int si5341_remove(struct i2c_client *client) +{ + struct clk_si5341 *data = i2c_get_clientdata(client); + int i; + + for (i = 0; i < SI5341_MAX_NUM_OUTPUTS; ++i) { + if (data->clk[i].vddo_reg) + regulator_disable(data->clk[i].vddo_reg); + } + + return 0; } static const struct i2c_device_id si5341_id[] = { @@ -1646,6 +1729,7 @@ static struct i2c_driver si5341_driver = { .of_match_table = clk_si5341_of_match, }, .probe = si5341_probe, + .remove = si5341_remove, .id_table = si5341_id, }; module_i2c_driver(si5341_driver); From 040113980081eed26e431cb5dc5a2c183107ad61 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Thu, 25 Mar 2021 13:26:43 -0600 Subject: [PATCH 166/509] clk: si5341: Add sysfs properties to allow checking/resetting device faults [ Upstream commit 9b13ff4340dff30f361462999a6a122fcc4e473f ] Add sysfs property files to allow viewing the current and latched states of the input present and PLL lock bits, and allow resetting the latched fault state. This allows manual checks or automated userspace polling for faults occurring after initialization. Signed-off-by: Robert Hancock Link: https://lore.kernel.org/r/20210325192643.2190069-10-robert.hancock@calian.com Signed-off-by: Stephen Boyd Stable-dep-of: 2560114c06d7 ("clk: si5341: return error if one synth clock registration fails") Signed-off-by: Sasha Levin --- drivers/clk/clk-si5341.c | 96 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index 64d962c54bba..5175b3024f06 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -1450,6 +1450,94 @@ static int si5341_clk_select_active_input(struct clk_si5341 *data) return res; } +static ssize_t input_present_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct clk_si5341 *data = dev_get_drvdata(dev); + u32 status; + int res = regmap_read(data->regmap, SI5341_STATUS, &status); + + if (res < 0) + return res; + res = !(status & SI5341_STATUS_LOSREF); + return snprintf(buf, PAGE_SIZE, "%d\n", res); +} +static DEVICE_ATTR_RO(input_present); + +static ssize_t input_present_sticky_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct clk_si5341 *data = dev_get_drvdata(dev); + u32 status; + int res = regmap_read(data->regmap, SI5341_STATUS_STICKY, &status); + + if (res < 0) + return res; + res = !(status & SI5341_STATUS_LOSREF); + return snprintf(buf, PAGE_SIZE, "%d\n", res); +} +static DEVICE_ATTR_RO(input_present_sticky); + +static ssize_t pll_locked_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct clk_si5341 *data = dev_get_drvdata(dev); + u32 status; + int res = regmap_read(data->regmap, SI5341_STATUS, &status); + + if (res < 0) + return res; + res = !(status & SI5341_STATUS_LOL); + return snprintf(buf, PAGE_SIZE, "%d\n", res); +} +static DEVICE_ATTR_RO(pll_locked); + +static ssize_t pll_locked_sticky_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct clk_si5341 *data = dev_get_drvdata(dev); + u32 status; + int res = regmap_read(data->regmap, SI5341_STATUS_STICKY, &status); + + if (res < 0) + return res; + res = !(status & SI5341_STATUS_LOL); + return snprintf(buf, PAGE_SIZE, "%d\n", res); +} +static DEVICE_ATTR_RO(pll_locked_sticky); + +static ssize_t clear_sticky_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct clk_si5341 *data = dev_get_drvdata(dev); + long val; + + if (kstrtol(buf, 10, &val)) + return -EINVAL; + if (val) { + int res = regmap_write(data->regmap, SI5341_STATUS_STICKY, 0); + + if (res < 0) + return res; + } + return count; +} +static DEVICE_ATTR_WO(clear_sticky); + +static const struct attribute *si5341_attributes[] = { + &dev_attr_input_present.attr, + &dev_attr_input_present_sticky.attr, + &dev_attr_pll_locked.attr, + &dev_attr_pll_locked_sticky.attr, + &dev_attr_clear_sticky.attr, + NULL +}; + static int si5341_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -1676,6 +1764,12 @@ static int si5341_probe(struct i2c_client *client, goto cleanup; } + err = sysfs_create_files(&client->dev.kobj, si5341_attributes); + if (err) { + dev_err(&client->dev, "unable to create sysfs files\n"); + goto cleanup; + } + /* Free the names, clk framework makes copies */ for (i = 0; i < data->num_synth; ++i) devm_kfree(&client->dev, (void *)synth_clock_names[i]); @@ -1695,6 +1789,8 @@ static int si5341_remove(struct i2c_client *client) struct clk_si5341 *data = i2c_get_clientdata(client); int i; + sysfs_remove_files(&client->dev.kobj, si5341_attributes); + for (i = 0; i < SI5341_MAX_NUM_OUTPUTS; ++i) { if (data->clk[i].vddo_reg) regulator_disable(data->clk[i].vddo_reg); From dc3eef64805531b56acdeb2fd9512b85478e4407 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 30 May 2023 12:39:08 +0300 Subject: [PATCH 167/509] clk: si5341: return error if one synth clock registration fails [ Upstream commit 2560114c06d7a752b3f4639f28cece58fed11267 ] In case devm_clk_hw_register() fails for one of synth clocks the probe continues. Later on, when registering output clocks which have as parents all the synth clocks, in case there is registration failure for at least one synth clock the information passed to clk core for registering output clock is not right: init.num_parents is fixed but init.parents may contain an array with less parents. Fixes: 3044a860fd09 ("clk: Add Si5341/Si5340 driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230530093913.1656095-4-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-si5341.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index 5175b3024f06..baa5e2ad2266 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -1545,7 +1545,7 @@ static int si5341_probe(struct i2c_client *client, struct clk_init_data init; struct clk *input; const char *root_clock_name; - const char *synth_clock_names[SI5341_NUM_SYNTH]; + const char *synth_clock_names[SI5341_NUM_SYNTH] = { NULL }; int err; unsigned int i; struct clk_si5341_output_config config[SI5341_MAX_NUM_OUTPUTS]; @@ -1693,6 +1693,7 @@ static int si5341_probe(struct i2c_client *client, if (err) { dev_err(&client->dev, "synth N%u registration failed\n", i); + goto free_clk_names; } } @@ -1770,16 +1771,17 @@ static int si5341_probe(struct i2c_client *client, goto cleanup; } +free_clk_names: /* Free the names, clk framework makes copies */ for (i = 0; i < data->num_synth; ++i) devm_kfree(&client->dev, (void *)synth_clock_names[i]); - return 0; - cleanup: - for (i = 0; i < SI5341_MAX_NUM_OUTPUTS; ++i) { - if (data->clk[i].vddo_reg) - regulator_disable(data->clk[i].vddo_reg); + if (err) { + for (i = 0; i < SI5341_MAX_NUM_OUTPUTS; ++i) { + if (data->clk[i].vddo_reg) + regulator_disable(data->clk[i].vddo_reg); + } } return err; } From dc8d0178d50649861d9681e7c736d8ccb4725a1a Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 30 May 2023 12:39:09 +0300 Subject: [PATCH 168/509] clk: si5341: check return value of {devm_}kasprintf() [ Upstream commit 36e4ef82016a2b785cf2317eade77e76699b7bff ] {devm_}kasprintf() returns a pointer to dynamically allocated memory. Pointer could be NULL in case allocation fails. Check pointer validity. Identified with coccinelle (kmerr.cocci script). Fixes: 3044a860fd09 ("clk: Add Si5341/Si5340 driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230530093913.1656095-5-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-si5341.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index baa5e2ad2266..af66097f9ac5 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -1685,6 +1685,10 @@ static int si5341_probe(struct i2c_client *client, for (i = 0; i < data->num_synth; ++i) { synth_clock_names[i] = devm_kasprintf(&client->dev, GFP_KERNEL, "%s.N%u", client->dev.of_node->name, i); + if (!synth_clock_names[i]) { + err = -ENOMEM; + goto free_clk_names; + } init.name = synth_clock_names[i]; data->synth[i].index = i; data->synth[i].data = data; @@ -1703,6 +1707,10 @@ static int si5341_probe(struct i2c_client *client, for (i = 0; i < data->num_outputs; ++i) { init.name = kasprintf(GFP_KERNEL, "%s.%d", client->dev.of_node->name, i); + if (!init.name) { + err = -ENOMEM; + goto free_clk_names; + } init.flags = config[i].synth_master ? CLK_SET_RATE_PARENT : 0; data->clk[i].index = i; data->clk[i].data = data; From 0b754f9cfd66314175059c66118a717bf0542284 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 30 May 2023 12:39:10 +0300 Subject: [PATCH 169/509] clk: si5341: free unused memory on probe failure [ Upstream commit 267ad94b13c53d8c99a336f0841b1fa1595b1d0f ] Pointers from synth_clock_names[] should be freed at the end of probe either on probe success or failure path. Fixes: b7bbf6ec4940 ("clk: si5341: Allow different output VDD_SEL values") Fixes: 9b13ff4340df ("clk: si5341: Add sysfs properties to allow checking/resetting device faults") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230530093913.1656095-6-claudiu.beznea@microchip.com Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/clk-si5341.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index af66097f9ac5..4dea29fa901d 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -1732,7 +1732,7 @@ static int si5341_probe(struct i2c_client *client, if (err) { dev_err(&client->dev, "output %u registration failed\n", i); - goto cleanup; + goto free_clk_names; } if (config[i].always_on) clk_prepare(data->clk[i].hw.clk); @@ -1742,7 +1742,7 @@ static int si5341_probe(struct i2c_client *client, data); if (err) { dev_err(&client->dev, "unable to add clk provider\n"); - goto cleanup; + goto free_clk_names; } if (initialization_required) { @@ -1750,11 +1750,11 @@ static int si5341_probe(struct i2c_client *client, regcache_cache_only(data->regmap, false); err = regcache_sync(data->regmap); if (err < 0) - goto cleanup; + goto free_clk_names; err = si5341_finalize_defaults(data); if (err < 0) - goto cleanup; + goto free_clk_names; } /* wait for device to report input clock present and PLL lock */ @@ -1763,21 +1763,19 @@ static int si5341_probe(struct i2c_client *client, 10000, 250000); if (err) { dev_err(&client->dev, "Error waiting for input clock or PLL lock\n"); - goto cleanup; + goto free_clk_names; } /* clear sticky alarm bits from initialization */ err = regmap_write(data->regmap, SI5341_STATUS_STICKY, 0); if (err) { dev_err(&client->dev, "unable to clear sticky status\n"); - goto cleanup; + goto free_clk_names; } err = sysfs_create_files(&client->dev.kobj, si5341_attributes); - if (err) { + if (err) dev_err(&client->dev, "unable to create sysfs files\n"); - goto cleanup; - } free_clk_names: /* Free the names, clk framework makes copies */ From fd9324fa4d81efe80800da027b084d670adde7b9 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 30 May 2023 12:39:11 +0300 Subject: [PATCH 170/509] clk: keystone: sci-clk: check return value of kasprintf() [ Upstream commit b73ed981da6d25c921aaefa7ca3df85bbd85b7fc ] kasprintf() returns a pointer to dynamically allocated memory. Pointer could be NULL in case allocation fails. Check pointer validity. Identified with coccinelle (kmerr.cocci script). Fixes: b745c0794e2f ("clk: keystone: Add sci-clk driver support") Depends-on: 96488c09b0f4 ("clk: keystone: sci-clk: cut down the clock name length") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230530093913.1656095-7-claudiu.beznea@microchip.com Reviewed-by: Tony Lindgren Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/keystone/sci-clk.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/keystone/sci-clk.c b/drivers/clk/keystone/sci-clk.c index 7e1b136e71ae..8af2a9faa805 100644 --- a/drivers/clk/keystone/sci-clk.c +++ b/drivers/clk/keystone/sci-clk.c @@ -302,6 +302,8 @@ static int _sci_clk_build(struct sci_clk_provider *provider, name = kasprintf(GFP_KERNEL, "clk:%d:%d", sci_clk->dev_id, sci_clk->clk_id); + if (!name) + return -ENOMEM; init.name = name; From 5f149d053898f8978ad95be8cbb916c0a173f61c Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 30 May 2023 12:39:12 +0300 Subject: [PATCH 171/509] clk: ti: clkctrl: check return value of kasprintf() [ Upstream commit bd46cd0b802d9c9576ca78007aa084ae3e74907b ] kasprintf() returns a pointer to dynamically allocated memory. Pointer could be NULL in case allocation fails. Check pointer validity. Identified with coccinelle (kmerr.cocci script). Fixes: 852049594b9a ("clk: ti: clkctrl: convert subclocks to use proper names also") Fixes: 6c3090520554 ("clk: ti: clkctrl: Fix hidden dependency to node name") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230530093913.1656095-8-claudiu.beznea@microchip.com Reviewed-by: Tony Lindgren Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/ti/clkctrl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c index 864c484bde1b..157abc46dcf4 100644 --- a/drivers/clk/ti/clkctrl.c +++ b/drivers/clk/ti/clkctrl.c @@ -267,6 +267,9 @@ static const char * __init clkctrl_get_clock_name(struct device_node *np, if (clkctrl_name && !legacy_naming) { clock_name = kasprintf(GFP_KERNEL, "%s-clkctrl:%04x:%d", clkctrl_name, offset, index); + if (!clock_name) + return NULL; + strreplace(clock_name, '_', '-'); return clock_name; @@ -598,6 +601,10 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node) if (clkctrl_name) { provider->clkdm_name = kasprintf(GFP_KERNEL, "%s_clkdm", clkctrl_name); + if (!provider->clkdm_name) { + kfree(provider); + return; + } goto clkdm_found; } From 47f4d875aa54d613956b14c9a81bf5f8febb3c16 Mon Sep 17 00:00:00 2001 From: Alexey Romanov Date: Sat, 10 Jun 2023 12:04:14 +0300 Subject: [PATCH 172/509] drivers: meson: secure-pwrc: always enable DMA domain [ Upstream commit 0bb4644d583789c97e74d3e3047189f0c59c4742 ] Starting from commit e45f243409db ("firmware: meson_sm: populate platform devices from sm device tree data") pwrc is probed successfully and disables unused pwr domains. By A1 SoC family design, any TEE requires DMA pwr domain always enabled. Fixes: b3dde5013e13 ("soc: amlogic: Add support for Secure power domains controller") Signed-off-by: Alexey Romanov Acked-by: Neil Armstrong Link: https://lore.kernel.org/r/20230610090414.90529-1-avromanov@sberdevices.ru [narmstrong: added fixes tag] Signed-off-by: Neil Armstrong Signed-off-by: Sasha Levin --- drivers/soc/amlogic/meson-secure-pwrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/amlogic/meson-secure-pwrc.c b/drivers/soc/amlogic/meson-secure-pwrc.c index fff92e2f3974..090a32666475 100644 --- a/drivers/soc/amlogic/meson-secure-pwrc.c +++ b/drivers/soc/amlogic/meson-secure-pwrc.c @@ -103,7 +103,7 @@ static struct meson_secure_pwrc_domain_desc a1_pwrc_domains[] = { SEC_PD(ACODEC, 0), SEC_PD(AUDIO, 0), SEC_PD(OTP, 0), - SEC_PD(DMA, 0), + SEC_PD(DMA, GENPD_FLAG_ALWAYS_ON | GENPD_FLAG_IRQ_SAFE), SEC_PD(SD_EMMC, 0), SEC_PD(RAMA, 0), /* SRAMB is used as ATF runtime memory, and should be always on */ From 714ba10a6dd19752a349e59aa875f3288ccb59b9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 3 Apr 2023 11:29:59 +0300 Subject: [PATCH 173/509] ovl: update of dentry revalidate flags after copy up [ Upstream commit b07d5cc93e1b28df47a72c519d09d0a836043613 ] After copy up, we may need to update d_flags if upper dentry is on a remote fs and lower dentries are not. Add helpers to allow incremental update of the revalidate flags. Fixes: bccece1ead36 ("ovl: allow remote upper") Reviewed-by: Gao Xiang Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/overlayfs/copy_up.c | 2 ++ fs/overlayfs/dir.c | 3 +-- fs/overlayfs/export.c | 3 +-- fs/overlayfs/namei.c | 3 +-- fs/overlayfs/overlayfs.h | 6 ++++-- fs/overlayfs/super.c | 2 +- fs/overlayfs/util.c | 24 ++++++++++++++++++++---- 7 files changed, 30 insertions(+), 13 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index e466c58f9ec4..7ef3c87f8a23 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -475,6 +475,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) /* Restore timestamps on parent (best effort) */ ovl_set_timestamps(upperdir, &c->pstat); ovl_dentry_set_upper_alias(c->dentry); + ovl_dentry_update_reval(c->dentry, upper); } } inode_unlock(udir); @@ -762,6 +763,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) inode_unlock(udir); ovl_dentry_set_upper_alias(c->dentry); + ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry)); } out: diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 8ebd9f2b1c95..a7021c87bfcb 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -266,8 +266,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode, ovl_dir_modified(dentry->d_parent, false); ovl_dentry_set_upper_alias(dentry); - ovl_dentry_update_reval(dentry, newdentry, - DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + ovl_dentry_init_reval(dentry, newdentry); if (!hardlink) { /* diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 44118f0ab0b3..f981283177ec 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -324,8 +324,7 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb, if (upper_alias) ovl_dentry_set_upper_alias(dentry); - ovl_dentry_update_reval(dentry, upper, - DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + ovl_dentry_init_reval(dentry, upper); return d_instantiate_anon(dentry, inode); diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 092812c2f118..ff5284b86bd5 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1095,8 +1095,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, ovl_set_flag(OVL_UPPERDATA, inode); } - ovl_dentry_update_reval(dentry, upperdentry, - DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + ovl_dentry_init_reval(dentry, upperdentry); revert_creds(old_cred); if (origin_path) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 898de3bf884e..26f91868fbda 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -257,8 +257,10 @@ bool ovl_index_all(struct super_block *sb); bool ovl_verify_lower(struct super_block *sb); struct ovl_entry *ovl_alloc_entry(unsigned int numlower); bool ovl_dentry_remote(struct dentry *dentry); -void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, - unsigned int mask); +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry); +void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry); +void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry, + unsigned int mask); bool ovl_dentry_weird(struct dentry *dentry); enum ovl_path_type ovl_path_type(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index e3cd5a00f880..5d7df839902d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1868,7 +1868,7 @@ static struct dentry *ovl_get_root(struct super_block *sb, ovl_dentry_set_flag(OVL_E_CONNECTED, root); ovl_set_upperdata(d_inode(root)); ovl_inode_init(d_inode(root), &oip, ino, fsid); - ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE); + ovl_dentry_init_flags(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE); return root; } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index e8b14d2c180c..060f9c99d9b3 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -90,14 +90,30 @@ struct ovl_entry *ovl_alloc_entry(unsigned int numlower) return oe; } +#define OVL_D_REVALIDATE (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE) + bool ovl_dentry_remote(struct dentry *dentry) { - return dentry->d_flags & - (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE); + return dentry->d_flags & OVL_D_REVALIDATE; } -void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *upperdentry, - unsigned int mask) +void ovl_dentry_update_reval(struct dentry *dentry, struct dentry *realdentry) +{ + if (!ovl_dentry_remote(realdentry)) + return; + + spin_lock(&dentry->d_lock); + dentry->d_flags |= realdentry->d_flags & OVL_D_REVALIDATE; + spin_unlock(&dentry->d_lock); +} + +void ovl_dentry_init_reval(struct dentry *dentry, struct dentry *upperdentry) +{ + return ovl_dentry_init_flags(dentry, upperdentry, OVL_D_REVALIDATE); +} + +void ovl_dentry_init_flags(struct dentry *dentry, struct dentry *upperdentry, + unsigned int mask) { struct ovl_entry *oe = OVL_E(dentry); unsigned int i, flags = 0; From 07be8e60f27f095942608c4cc6ff474a4326f6c6 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 14 Jun 2023 15:15:09 +0300 Subject: [PATCH 174/509] ASoC: imx-audmix: check return value of devm_kasprintf() [ Upstream commit 2f76e1d6ca524a888d29aafe29f2ad2003857971 ] devm_kasprintf() returns a pointer to dynamically allocated memory. Pointer could be NULL in case allocation fails. Check pointer validity. Identified with coccinelle (kmerr.cocci script). Fixes: b86ef5367761 ("ASoC: fsl: Add Audio Mixer machine driver") Signed-off-by: Claudiu Beznea Link: https://lore.kernel.org/r/20230614121509.443926-1-claudiu.beznea@microchip.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/imx-audmix.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c index cbdc0a2c09c5..77d8234c7ac4 100644 --- a/sound/soc/fsl/imx-audmix.c +++ b/sound/soc/fsl/imx-audmix.c @@ -230,6 +230,8 @@ static int imx_audmix_probe(struct platform_device *pdev) dai_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s%s", fe_name_pref, args.np->full_name + 1); + if (!dai_name) + return -ENOMEM; dev_info(pdev->dev.parent, "DAI FE name:%s\n", dai_name); @@ -238,6 +240,8 @@ static int imx_audmix_probe(struct platform_device *pdev) capture_dai_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s %s", dai_name, "CPU-Capture"); + if (!capture_dai_name) + return -ENOMEM; } priv->dai[i].cpus = &dlc[0]; @@ -268,6 +272,8 @@ static int imx_audmix_probe(struct platform_device *pdev) "AUDMIX-Playback-%d", i); be_cp = devm_kasprintf(&pdev->dev, GFP_KERNEL, "AUDMIX-Capture-%d", i); + if (!be_name || !be_pb || !be_cp) + return -ENOMEM; priv->dai[num_dai + i].cpus = &dlc[3]; priv->dai[num_dai + i].codecs = &dlc[4]; @@ -295,6 +301,9 @@ static int imx_audmix_probe(struct platform_device *pdev) priv->dapm_routes[i].source = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s %s", dai_name, "CPU-Playback"); + if (!priv->dapm_routes[i].source) + return -ENOMEM; + priv->dapm_routes[i].sink = be_pb; priv->dapm_routes[num_dai + i].source = be_pb; priv->dapm_routes[num_dai + i].sink = be_cp; From 8e9907e9219f3ee160e796cc8f7b8b41874e7dc0 Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli Date: Wed, 15 Mar 2023 12:38:00 +0530 Subject: [PATCH 175/509] PCI: cadence: Fix Gen2 Link Retraining process [ Upstream commit 0e12f830236928b6fadf40d917a7527f0a048d2f ] The Link Retraining process is initiated to account for the Gen2 defect in the Cadence PCIe controller in J721E SoC. The errata corresponding to this is i2085, documented at: https://www.ti.com/lit/er/sprz455c/sprz455c.pdf The existing workaround implemented for the errata waits for the Data Link initialization to complete and assumes that the link retraining process at the Physical Layer has completed. However, it is possible that the Physical Layer training might be ongoing as indicated by the PCI_EXP_LNKSTA_LT bit in the PCI_EXP_LNKSTA register. Fix the existing workaround, to ensure that the Physical Layer training has also completed, in addition to the Data Link initialization. Link: https://lore.kernel.org/r/20230315070800.1615527-1-s-vadapalli@ti.com Fixes: 4740b969aaf5 ("PCI: cadence: Retrain Link to work around Gen2 training defect") Signed-off-by: Siddharth Vadapalli Signed-off-by: Lorenzo Pieralisi Reviewed-by: Vignesh Raghavendra Signed-off-by: Sasha Levin --- .../controller/cadence/pcie-cadence-host.c | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/pci/controller/cadence/pcie-cadence-host.c b/drivers/pci/controller/cadence/pcie-cadence-host.c index fb96d37a135c..4d8d15ac51ef 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-host.c +++ b/drivers/pci/controller/cadence/pcie-cadence-host.c @@ -12,6 +12,8 @@ #include "pcie-cadence.h" +#define LINK_RETRAIN_TIMEOUT HZ + static u64 bar_max_size[] = { [RP_BAR0] = _ULL(128 * SZ_2G), [RP_BAR1] = SZ_2G, @@ -77,6 +79,27 @@ static struct pci_ops cdns_pcie_host_ops = { .write = pci_generic_config_write, }; +static int cdns_pcie_host_training_complete(struct cdns_pcie *pcie) +{ + u32 pcie_cap_off = CDNS_PCIE_RP_CAP_OFFSET; + unsigned long end_jiffies; + u16 lnk_stat; + + /* Wait for link training to complete. Exit after timeout. */ + end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT; + do { + lnk_stat = cdns_pcie_rp_readw(pcie, pcie_cap_off + PCI_EXP_LNKSTA); + if (!(lnk_stat & PCI_EXP_LNKSTA_LT)) + break; + usleep_range(0, 1000); + } while (time_before(jiffies, end_jiffies)); + + if (!(lnk_stat & PCI_EXP_LNKSTA_LT)) + return 0; + + return -ETIMEDOUT; +} + static int cdns_pcie_host_wait_for_link(struct cdns_pcie *pcie) { struct device *dev = pcie->dev; @@ -118,6 +141,10 @@ static int cdns_pcie_retrain(struct cdns_pcie *pcie) cdns_pcie_rp_writew(pcie, pcie_cap_off + PCI_EXP_LNKCTL, lnk_ctl); + ret = cdns_pcie_host_training_complete(pcie); + if (ret) + return ret; + ret = cdns_pcie_host_wait_for_link(pcie); } return ret; From ac64019e4d4b08c23edb117e0b2590985e33de1d Mon Sep 17 00:00:00 2001 From: Jinhong Zhu Date: Tue, 2 May 2023 22:00:21 +0800 Subject: [PATCH 176/509] scsi: qedf: Fix NULL dereference in error handling [ Upstream commit f025312b089474a54e4859f3453771314d9e3d4f ] Smatch reported: drivers/scsi/qedf/qedf_main.c:3056 qedf_alloc_global_queues() warn: missing unwind goto? At this point in the function, nothing has been allocated so we can return directly. In particular the "qedf->global_queues" have not been allocated so calling qedf_free_global_queues() will lead to a NULL dereference when we check if (!gl[i]) and "gl" is NULL. Fixes: 61d8658b4a43 ("scsi: qedf: Add QLogic FastLinQ offload FCoE driver framework.") Signed-off-by: Jinhong Zhu Link: https://lore.kernel.org/r/20230502140022.2852-1-jinhongzhu@hust.edu.cn Reviewed-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qedf/qedf_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index f48ef47546f4..b33cb1172f31 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -3042,9 +3042,8 @@ static int qedf_alloc_global_queues(struct qedf_ctx *qedf) * addresses of our queues */ if (!qedf->p_cpuq) { - status = -EINVAL; QEDF_ERR(&qedf->dbg_ctx, "p_cpuq is NULL.\n"); - goto mem_alloc_failure; + return -EINVAL; } qedf->global_queues = kzalloc((sizeof(struct global_queue *) From d27238fc83b9042cad13b260a8b736b5431ad722 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 16 Apr 2023 23:43:41 +0200 Subject: [PATCH 177/509] pinctrl: bcm2835: Handle gpiochip_add_pin_range() errors [ Upstream commit cdf7e616120065007687fe1df0412154f259daec ] gpiochip_add_pin_range() can fail, so better return its error code than a hard coded '0'. Fixes: d2b67744fd99 ("pinctrl: bcm2835: implement hook for missing gpio-ranges") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/98c3b5890bb72415145c9fe4e1d974711edae376.1681681402.git.christophe.jaillet@wanadoo.fr Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/bcm/pinctrl-bcm2835.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index c7ae9f900b53..e3f49d0ed029 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -359,10 +359,8 @@ static int bcm2835_of_gpio_ranges_fallback(struct gpio_chip *gc, if (!pctldev) return 0; - gpiochip_add_pin_range(gc, pinctrl_dev_get_devname(pctldev), 0, 0, - gc->ngpio); - - return 0; + return gpiochip_add_pin_range(gc, pinctrl_dev_get_devname(pctldev), 0, 0, + gc->ngpio); } static const struct gpio_chip bcm2835_gpio_chip = { From 7badf4d6f49a358a01ab072bbff88d3ee886c33b Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Sun, 7 May 2023 11:40:57 +0800 Subject: [PATCH 178/509] PCI/ASPM: Disable ASPM on MFD function removal to avoid use-after-free [ Upstream commit 456d8aa37d0f56fc9e985e812496e861dcd6f2f2 ] Struct pcie_link_state->downstream is a pointer to the pci_dev of function 0. Previously we retained that pointer when removing function 0, and subsequent ASPM policy changes dereferenced it, resulting in a use-after-free warning from KASAN, e.g.: # echo 1 > /sys/bus/pci/devices/0000:03:00.0/remove # echo powersave > /sys/module/pcie_aspm/parameters/policy BUG: KASAN: slab-use-after-free in pcie_config_aspm_link+0x42d/0x500 Call Trace: kasan_report+0xae/0xe0 pcie_config_aspm_link+0x42d/0x500 pcie_aspm_set_policy+0x8e/0x1a0 param_attr_store+0x162/0x2c0 module_attr_store+0x3e/0x80 PCIe spec r6.0, sec 7.5.3.7, recommends that software program the same ASPM Control value in all functions of multi-function devices. Disable ASPM and free the pcie_link_state when any child function is removed so we can discard the dangling pcie_link_state->downstream pointer and maintain the same ASPM Control configuration for all functions. [bhelgaas: commit log and comment] Debugged-by: Zongquan Qin Suggested-by: Bjorn Helgaas Fixes: b5a0a9b59c81 ("PCI/ASPM: Read and set up L1 substate capabilities") Link: https://lore.kernel.org/r/20230507034057.20970-1-dinghui@sangfor.com.cn Signed-off-by: Ding Hui Signed-off-by: Bjorn Helgaas Signed-off-by: Sasha Levin --- drivers/pci/pcie/aspm.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index ac0557a305af..51da8ba67d21 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -993,21 +993,24 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) down_read(&pci_bus_sem); mutex_lock(&aspm_lock); - /* - * All PCIe functions are in one slot, remove one function will remove - * the whole slot, so just wait until we are the last function left. - */ - if (!list_empty(&parent->subordinate->devices)) - goto out; link = parent->link_state; root = link->root; parent_link = link->parent; - /* All functions are removed, so just disable ASPM for the link */ + /* + * link->downstream is a pointer to the pci_dev of function 0. If + * we remove that function, the pci_dev is about to be deallocated, + * so we can't use link->downstream again. Free the link state to + * avoid this. + * + * If we're removing a non-0 function, it's possible we could + * retain the link state, but PCIe r6.0, sec 7.5.3.7, recommends + * programming the same ASPM Control value for all functions of + * multi-function devices, so disable ASPM for all of them. + */ pcie_config_aspm_link(link, 0); list_del(&link->sibling); - /* Clock PM is for endpoint device */ free_link_state(link); /* Recheck latencies and configure upstream links */ @@ -1015,7 +1018,7 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) pcie_update_aspm_capable(root); pcie_config_aspm_path(parent_link); } -out: + mutex_unlock(&aspm_lock); up_read(&pci_bus_sem); } From b9895a4c95f3db8eaeb073eee29f7efe47d3f3d5 Mon Sep 17 00:00:00 2001 From: Yuchen Yang Date: Fri, 5 May 2023 22:12:55 +0800 Subject: [PATCH 179/509] scsi: 3w-xxxx: Add error handling for initialization failure in tw_probe() [ Upstream commit 2e2fe5ac695a00ab03cab4db1f4d6be07168ed9d ] Smatch complains that: tw_probe() warn: missing error code 'retval' This patch adds error checking to tw_probe() to handle initialization failure. If tw_reset_sequence() function returns a non-zero value, the function will return -EINVAL to indicate initialization failure. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yuchen Yang Link: https://lore.kernel.org/r/20230505141259.7730-1-u202114568@hust.edu.cn Reviewed-by: Dan Carpenter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/3w-xxxx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c index fb6444d0409c..211a25351e7d 100644 --- a/drivers/scsi/3w-xxxx.c +++ b/drivers/scsi/3w-xxxx.c @@ -2308,8 +2308,10 @@ static int tw_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) TW_DISABLE_INTERRUPTS(tw_dev); /* Initialize the card */ - if (tw_reset_sequence(tw_dev)) + if (tw_reset_sequence(tw_dev)) { + retval = -EINVAL; goto out_release_mem_region; + } /* Set host specific parameters */ host->max_id = TW_MAX_UNITS; From cb389e8edf64ca41e6b601f1e3b9b39b6b05569a Mon Sep 17 00:00:00 2001 From: Rongguang Wei Date: Fri, 12 May 2023 10:15:18 +0800 Subject: [PATCH 180/509] PCI: pciehp: Cancel bringup sequence if card is not present [ Upstream commit e8afd0d9fccc27c8ad263db5cf5952cfcf72d6fe ] If a PCIe hotplug slot has an Attention Button, the normal hot-add flow is: - Slot is empty and slot power is off - User inserts card in slot and presses Attention Button - OS blinks Power Indicator for 5 seconds - After 5 seconds, OS turns on Power Indicator, turns on slot power, and enumerates the device Previously, if a user pressed the Attention Button on an *empty* slot, pciehp logged the following messages and blinked the Power Indicator until a second button press: [0.000] pciehp: Button press: will power on in 5 sec [0.001] # Power Indicator starts blinking [5.001] # 5 second timeout; slot is empty, so we should cancel the request to power on and turn off Power Indicator [7.000] # Power Indicator still blinking [8.000] # possible card insertion [9.000] pciehp: Button press: canceling request to power on The first button press incorrectly left the slot in BLINKINGON_STATE, so the second was interpreted as a "cancel power on" event regardless of whether a card was present. If the slot is empty, turn off the Power Indicator and return from BLINKINGON_STATE to OFF_STATE after 5 seconds, effectively canceling the request to power on. Putting the slot in OFF_STATE also means the second button press will correctly request a slot power on if the slot is occupied. [bhelgaas: commit log] Link: https://lore.kernel.org/r/20230512021518.336460-1-clementwei90@163.com Fixes: d331710ea78f ("PCI: pciehp: Become resilient to missed events") Suggested-by: Lukas Wunner Signed-off-by: Rongguang Wei Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Signed-off-by: Sasha Levin --- drivers/pci/hotplug/pciehp_ctrl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index 529c34808440..32baba1b7f13 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -256,6 +256,14 @@ void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events) present = pciehp_card_present(ctrl); link_active = pciehp_check_link_active(ctrl); if (present <= 0 && link_active <= 0) { + if (ctrl->state == BLINKINGON_STATE) { + ctrl->state = OFF_STATE; + cancel_delayed_work(&ctrl->button_work); + pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, + INDICATOR_NOOP); + ctrl_info(ctrl, "Slot(%s): Card not present\n", + slot_name(ctrl)); + } mutex_unlock(&ctrl->state_lock); return; } From b65fe59b2d623c7008c3a69a1064f0759d9f2b3e Mon Sep 17 00:00:00 2001 From: Junyan Ye Date: Mon, 8 May 2023 12:36:41 +0800 Subject: [PATCH 181/509] PCI: ftpci100: Release the clock resources [ Upstream commit c60738de85f40b0b9f5cb23c21f9246e5a47908c ] Smatch reported: 1. drivers/pci/controller/pci-ftpci100.c:526 faraday_pci_probe() warn: 'clk' from clk_prepare_enable() not released on lines: 442,451,462,478,512,517. 2. drivers/pci/controller/pci-ftpci100.c:526 faraday_pci_probe() warn: 'p->bus_clk' from clk_prepare_enable() not released on lines: 451,462,478,512,517. The clock resource is obtained by devm_clk_get(), and then clk_prepare_enable() makes the clock resource ready for use. After that, clk_disable_unprepare() should be called to release the clock resource when it is no longer needed. However, while doing some error handling in faraday_pci_probe(), clk_disable_unprepare() is not called to release clk and p->bus_clk before returning. These return lines are exactly 442, 451, 462, 478, 512, 517. Fix this warning by replacing devm_clk_get() with devm_clk_get_enabled(), which is equivalent to devm_clk_get() + clk_prepare_enable(). And with devm_clk_get_enabled(), the clock will automatically be disabled, unprepared and freed when the device is unbound from the bus. Link: https://lore.kernel.org/r/20230508043641.23807-1-yejunyan@hust.edu.cn Fixes: b3c433efb8a3 ("PCI: faraday: Fix wrong pointer passed to PTR_ERR()") Fixes: 2eeb02b28579 ("PCI: faraday: Add clock handling") Fixes: 783a862563f7 ("PCI: faraday: Use pci_parse_request_of_pci_ranges()") Fixes: d3c68e0a7e34 ("PCI: faraday: Add Faraday Technology FTPCI100 PCI Host Bridge driver") Fixes: f1e8bd21e39e ("PCI: faraday: Convert IRQ masking to raw PCI config accessors") Signed-off-by: Junyan Ye Signed-off-by: Lorenzo Pieralisi Reviewed-by: Dongliang Mu Reviewed-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pci/controller/pci-ftpci100.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c index aefef1986201..80cfea5d9f12 100644 --- a/drivers/pci/controller/pci-ftpci100.c +++ b/drivers/pci/controller/pci-ftpci100.c @@ -442,22 +442,12 @@ static int faraday_pci_probe(struct platform_device *pdev) p->dev = dev; /* Retrieve and enable optional clocks */ - clk = devm_clk_get(dev, "PCLK"); + clk = devm_clk_get_enabled(dev, "PCLK"); if (IS_ERR(clk)) return PTR_ERR(clk); - ret = clk_prepare_enable(clk); - if (ret) { - dev_err(dev, "could not prepare PCLK\n"); - return ret; - } - p->bus_clk = devm_clk_get(dev, "PCICLK"); + p->bus_clk = devm_clk_get_enabled(dev, "PCICLK"); if (IS_ERR(p->bus_clk)) return PTR_ERR(p->bus_clk); - ret = clk_prepare_enable(p->bus_clk); - if (ret) { - dev_err(dev, "could not prepare PCICLK\n"); - return ret; - } p->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(p->base)) From f0d2310f6b46c63b78556c94811d754364180ea2 Mon Sep 17 00:00:00 2001 From: Sui Jingfeng Date: Wed, 31 May 2023 18:27:44 +0800 Subject: [PATCH 182/509] PCI: Add pci_clear_master() stub for non-CONFIG_PCI [ Upstream commit 2aa5ac633259843f656eb6ecff4cf01e8e810c5e ] Add a pci_clear_master() stub when CONFIG_PCI is not set so drivers that support both PCI and platform devices don't need #ifdefs or extra Kconfig symbols for the PCI parts. [bhelgaas: commit log] Fixes: 6a479079c072 ("PCI: Add pci_clear_master() as opposite of pci_set_master()") Link: https://lore.kernel.org/r/20230531102744.2354313-1-suijingfeng@loongson.cn Signed-off-by: Sui Jingfeng Signed-off-by: Bjorn Helgaas Reviewed-by: Geert Uytterhoeven Signed-off-by: Sasha Levin --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index 4cc42ad2f6c5..550e1cdb473f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1719,6 +1719,7 @@ static inline struct pci_dev *pci_get_class(unsigned int class, #define pci_dev_put(dev) do { } while (0) static inline void pci_set_master(struct pci_dev *dev) { } +static inline void pci_clear_master(struct pci_dev *dev) { } static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; } static inline void pci_disable_device(struct pci_dev *dev) { } static inline int pcim_enable_device(struct pci_dev *pdev) { return -EIO; } From 345ee8521655e69f63a01376572745431f461714 Mon Sep 17 00:00:00 2001 From: Sohaib Mohamed Date: Fri, 19 Nov 2021 08:14:08 +0200 Subject: [PATCH 183/509] perf bench: Use unbuffered output when pipe/tee'ing to a file [ Upstream commit f0a29c9647ff8bbb424641f79bc1894e83dec218 ] The output of 'perf bench' gets buffered when I pipe it to a file or to tee, in such a way that I can see it only at the end. E.g. $ perf bench internals synthesize -t < output comes out fine after each test run > $ perf bench internals synthesize -t | tee file.txt < output comes out only at the end of all tests > This patch resolves this issue for 'bench' and 'test' subcommands. See, also: $ perf bench mem all | tee file.txt $ perf bench sched all | tee file.txt $ perf bench internals all -t | tee file.txt $ perf bench internals all | tee file.txt Committer testing: It really gets staggered, i.e. outputs in bursts, when the buffer fills up and has to be drained to make up space for more output. Suggested-by: Riccardo Mancini Signed-off-by: Sohaib Mohamed Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Fabian Hemmer Cc: Ian Rogers Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211119061409.78004-1-sohaib.amhmd@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: 16203e9cd018 ("perf bench: Add missing setlocale() call to allow usage of %'d style formatting") Signed-off-by: Sasha Levin --- tools/perf/builtin-bench.c | 5 +++-- tools/perf/tests/builtin-test.c | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 62a7b7420a44..609a941ae296 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -225,7 +225,6 @@ static void run_collection(struct collection *coll) if (!bench->fn) break; printf("# Running %s/%s benchmark...\n", coll->name, bench->name); - fflush(stdout); argv[1] = bench->name; run_bench(coll->name, bench->name, bench->fn, 1, argv); @@ -246,6 +245,9 @@ int cmd_bench(int argc, const char **argv) struct collection *coll; int ret = 0; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + if (argc < 2) { /* No collection specified. */ print_usage(); @@ -299,7 +301,6 @@ int cmd_bench(int argc, const char **argv) if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); - fflush(stdout); ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 132bdb3e6c31..73c911dd0c2c 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -793,6 +793,9 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc - 1, argv + 1); From 4b63caf86edacd79bec6bfb09f9aaae0433566d3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 2 Jun 2023 15:38:25 -0300 Subject: [PATCH 184/509] perf bench: Add missing setlocale() call to allow usage of %'d style formatting [ Upstream commit 16203e9cd01896b4244100a8e3fb9f6e612ab2b1 ] Without this we were not getting the thousands separator for big numbers. Noticed while developing 'perf bench uprobe', but the use of %' predates that, for instance 'perf bench syscall' uses it. Before: # perf bench uprobe all # Running uprobe/baseline benchmark... # Executed 1000 usleep(1000) calls Total time: 1054082243ns 1054082.243000 nsecs/op # After: # perf bench uprobe all # Running uprobe/baseline benchmark... # Executed 1,000 usleep(1000) calls Total time: 1,053,715,144ns 1,053,715.144000 nsecs/op # Fixes: c2a08203052f8975 ("perf bench: Add basic syscall benchmark") Cc: Adrian Hunter Cc: Andre Fredette Cc: Clark Williams Cc: Dave Tucker Cc: Davidlohr Bueso Cc: Derek Barbosa Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tiezhu Yang Link: https://lore.kernel.org/lkml/ZH3lcepZ4tBYr1jv@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-bench.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 609a941ae296..fb3029495c23 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -21,6 +21,7 @@ #include "builtin.h" #include "bench/bench.h" +#include #include #include #include @@ -247,6 +248,7 @@ int cmd_bench(int argc, const char **argv) /* Unbuffered output */ setvbuf(stdout, NULL, _IONBF, 0); + setlocale(LC_ALL, ""); if (argc < 2) { /* No collection specified. */ From 5533f0eb0a2935ca612fe3357b593e9bb45d05cd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Jun 2023 17:37:34 +0300 Subject: [PATCH 185/509] pinctrl: cherryview: Return correct value if pin in push-pull mode [ Upstream commit 5835196a17be5cfdcad0b617f90cf4abe16951a4 ] Currently the getter returns ENOTSUPP on pin configured in the push-pull mode. Fix this by adding the missed switch case. Fixes: ccdf81d08dbe ("pinctrl: cherryview: add option to set open-drain pin config") Fixes: 6e08d6bbebeb ("pinctrl: Add Intel Cherryview/Braswell pin controller support") Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-cherryview.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 2ed17cdf946d..44caada37b71 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -945,11 +945,6 @@ static int chv_config_get(struct pinctrl_dev *pctldev, unsigned int pin, break; - case PIN_CONFIG_DRIVE_OPEN_DRAIN: - if (!(ctrl1 & CHV_PADCTRL1_ODEN)) - return -EINVAL; - break; - case PIN_CONFIG_BIAS_HIGH_IMPEDANCE: { u32 cfg; @@ -959,6 +954,16 @@ static int chv_config_get(struct pinctrl_dev *pctldev, unsigned int pin, return -EINVAL; break; + + case PIN_CONFIG_DRIVE_PUSH_PULL: + if (ctrl1 & CHV_PADCTRL1_ODEN) + return -EINVAL; + break; + + case PIN_CONFIG_DRIVE_OPEN_DRAIN: + if (!(ctrl1 & CHV_PADCTRL1_ODEN)) + return -EINVAL; + break; } default: From 958acb479ef21599a85f87f70eb07f38a8363568 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 May 2023 17:31:17 +0200 Subject: [PATCH 186/509] kcsan: Don't expect 64 bits atomic builtins from 32 bits architectures [ Upstream commit 353e7300a1db928e427462f2745f9a2cd1625b3d ] Activating KCSAN on a 32 bits architecture leads to the following link-time failure: LD .tmp_vmlinux.kallsyms1 powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_load': kernel/kcsan/core.c:1273: undefined reference to `__atomic_load_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_store': kernel/kcsan/core.c:1273: undefined reference to `__atomic_store_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_exchange': kernel/kcsan/core.c:1273: undefined reference to `__atomic_exchange_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_fetch_add': kernel/kcsan/core.c:1273: undefined reference to `__atomic_fetch_add_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_fetch_sub': kernel/kcsan/core.c:1273: undefined reference to `__atomic_fetch_sub_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_fetch_and': kernel/kcsan/core.c:1273: undefined reference to `__atomic_fetch_and_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_fetch_or': kernel/kcsan/core.c:1273: undefined reference to `__atomic_fetch_or_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_fetch_xor': kernel/kcsan/core.c:1273: undefined reference to `__atomic_fetch_xor_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_fetch_nand': kernel/kcsan/core.c:1273: undefined reference to `__atomic_fetch_nand_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_compare_exchange_strong': kernel/kcsan/core.c:1273: undefined reference to `__atomic_compare_exchange_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_compare_exchange_weak': kernel/kcsan/core.c:1273: undefined reference to `__atomic_compare_exchange_8' powerpc64-linux-ld: kernel/kcsan/core.o: in function `__tsan_atomic64_compare_exchange_val': kernel/kcsan/core.c:1273: undefined reference to `__atomic_compare_exchange_8' 32 bits architectures don't have 64 bits atomic builtins. Only include DEFINE_TSAN_ATOMIC_OPS(64) on 64 bits architectures. Fixes: 0f8ad5f2e934 ("kcsan: Add support for atomic builtins") Suggested-by: Marco Elver Signed-off-by: Christophe Leroy Reviewed-by: Marco Elver Acked-by: Marco Elver Signed-off-by: Michael Ellerman Link: https://msgid.link/d9c6afc28d0855240171a4e0ad9ffcdb9d07fceb.1683892665.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- kernel/kcsan/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 762df6108c58..473dc04591b8 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -1035,7 +1035,9 @@ EXPORT_SYMBOL(__tsan_init); DEFINE_TSAN_ATOMIC_OPS(8); DEFINE_TSAN_ATOMIC_OPS(16); DEFINE_TSAN_ATOMIC_OPS(32); +#ifdef CONFIG_64BIT DEFINE_TSAN_ATOMIC_OPS(64); +#endif void __tsan_atomic_thread_fence(int memorder); void __tsan_atomic_thread_fence(int memorder) From 647c6d35ccfe47c8344a9c09e819e1fb09eb69cc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Mar 2021 08:59:21 -0300 Subject: [PATCH 187/509] perf script: Fixup 'struct evsel_script' method prefix [ Upstream commit 297e69bfa4c7aa27259dd456af1377e868337043 ] They all operate on 'struct evsel_script' instances, so should be prefixed with evsel_script__, not with perf_evsel_script__. Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: 36d3e4138e1b ("perf script: Fix allocation of evsel->priv related to per-event dump files") Signed-off-by: Sasha Levin --- tools/perf/builtin-script.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5109d01619ee..5651714e527c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -295,8 +295,7 @@ static inline struct evsel_script *evsel_script(struct evsel *evsel) return (struct evsel_script *)evsel->priv; } -static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, - struct perf_data *data) +static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data) { struct evsel_script *es = zalloc(sizeof(*es)); @@ -316,7 +315,7 @@ static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, return NULL; } -static void perf_evsel_script__delete(struct evsel_script *es) +static void evsel_script__delete(struct evsel_script *es) { zfree(&es->filename); fclose(es->fp); @@ -324,7 +323,7 @@ static void perf_evsel_script__delete(struct evsel_script *es) free(es); } -static int perf_evsel_script__fprintf(struct evsel_script *es, FILE *fp) +static int evsel_script__fprintf(struct evsel_script *es, FILE *fp) { struct stat st; @@ -2166,8 +2165,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, if (!evsel->priv) { if (scr->per_event_dump) { - evsel->priv = perf_evsel_script__new(evsel, - scr->session->data); + evsel->priv = evsel_script__new(evsel, scr->session->data); } else { es = zalloc(sizeof(*es)); if (!es) @@ -2422,7 +2420,7 @@ static void perf_script__fclose_per_event_dump(struct perf_script *script) evlist__for_each_entry(evlist, evsel) { if (!evsel->priv) break; - perf_evsel_script__delete(evsel->priv); + evsel_script__delete(evsel->priv); evsel->priv = NULL; } } @@ -2442,7 +2440,7 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) if (evsel->priv != NULL) continue; - evsel->priv = perf_evsel_script__new(evsel, script->session->data); + evsel->priv = evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; } @@ -2477,8 +2475,8 @@ static void perf_script__exit_per_event_dump_stats(struct perf_script *script) evlist__for_each_entry(script->session->evlist, evsel) { struct evsel_script *es = evsel->priv; - perf_evsel_script__fprintf(es, stdout); - perf_evsel_script__delete(es); + evsel_script__fprintf(es, stdout); + evsel_script__delete(es); evsel->priv = NULL; } } From 75a3cb1e2317ade7c2918bcedccf40c868da2719 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 6 Jun 2023 16:11:10 -0300 Subject: [PATCH 188/509] perf script: Fix allocation of evsel->priv related to per-event dump files [ Upstream commit 36d3e4138e1b6cc9ab179f3f397b5548f8b1eaae ] When printing output we may want to generate per event files, where the --per-event-dump option should be used, creating perf.data.EVENT.dump files instead of printing to stdout. The callback thar processes event thus expects that evsel->priv->fp should point to either the per-event FILE descriptor or to stdout. The a3af66f51bd0bca7 ("perf script: Fix crash because of missing evsel->priv") changeset fixed a case where evsel->priv wasn't setup, thus set to NULL, causing a segfault when trying to access evsel->priv->fp. But it did it for the non --per-event-dump case by allocating a 'struct perf_evsel_script' just to set its ->fp to stdout. Since evsel->priv is only freed when --per-event-dump is used, we ended up with a memory leak, detected using ASAN. Fix it by using the same method as perf_script__setup_per_event_dump(), and reuse that static 'struct perf_evsel_script'. Also check if evsel_script__new() failed. Fixes: a3af66f51bd0bca7 ("perf script: Fix crash because of missing evsel->priv") Reported-by: Ian Rogers Tested-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Ravi Bangoria Link: https://lore.kernel.org/lkml/ZH+F0wGAWV14zvMP@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-script.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5651714e527c..85befbacb2a4 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2146,6 +2146,9 @@ static int process_sample_event(struct perf_tool *tool, return 0; } +// Used when scr->per_event_dump is not set +static struct evsel_script es_stdout; + static int process_attr(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist) { @@ -2154,7 +2157,6 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, struct evsel *evsel, *pos; u64 sample_type; int err; - static struct evsel_script *es; err = perf_event__process_attr(tool, event, pevlist); if (err) @@ -2164,14 +2166,13 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, evsel = evlist__last(*pevlist); if (!evsel->priv) { - if (scr->per_event_dump) { + if (scr->per_event_dump) { evsel->priv = evsel_script__new(evsel, scr->session->data); - } else { - es = zalloc(sizeof(*es)); - if (!es) + if (!evsel->priv) return -ENOMEM; - es->fp = stdout; - evsel->priv = es; + } else { // Replicate what is done in perf_script__setup_per_event_dump() + es_stdout.fp = stdout; + evsel->priv = &es_stdout; } } @@ -2455,7 +2456,6 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) static int perf_script__setup_per_event_dump(struct perf_script *script) { struct evsel *evsel; - static struct evsel_script es_stdout; if (script->per_event_dump) return perf_script__fopen_per_event_dump(script); From b7a38fc3f384932c50c712e8b0f81310939a1ba0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Jun 2023 16:41:01 -0700 Subject: [PATCH 189/509] perf dwarf-aux: Fix off-by-one in die_get_varname() [ Upstream commit 3abfcfd847717d232e36963f31a361747c388fe7 ] The die_get_varname() returns "(unknown_type)" string if it failed to find a type for the variable. But it had a space before the opening parenthesis and it made the closing parenthesis cut off due to the off-by-one in the string length (14). Signed-off-by: Namhyung Kim Fixes: 88fd633cdfa19060 ("perf probe: No need to use formatting strbuf method") Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230612234102.3909116-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index f8a10d5148f6..443374a77c8d 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1081,7 +1081,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) ret = die_get_typename(vr_die, buf); if (ret < 0) { pr_debug("Failed to get type, make it unknown.\n"); - ret = strbuf_add(buf, " (unknown_type)", 14); + ret = strbuf_add(buf, "(unknown_type)", 14); } return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); From f3c7b95c9991dab02e616fc251b6c3516e0bd0ac Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 15 Jun 2023 13:53:33 +0300 Subject: [PATCH 190/509] pinctrl: at91-pio4: check return value of devm_kasprintf() [ Upstream commit f6fd5d4ff8ca0b24cee1af4130bcb1fa96b61aa0 ] devm_kasprintf() returns a pointer to dynamically allocated memory. Pointer could be NULL in case allocation fails. Check pointer validity. Identified with coccinelle (kmerr.cocci script). Fixes: 776180848b57 ("pinctrl: introduce driver for Atmel PIO4 controller") Depends-on: 1c4e5c470a56 ("pinctrl: at91: use devm_kasprintf() to avoid potential leaks") Depends-on: 5a8f9cf269e8 ("pinctrl: at91-pio4: use proper format specifier for unsigned int") Signed-off-by: Claudiu Beznea Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230615105333.585304-4-claudiu.beznea@microchip.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-at91-pio4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index 315a6c4d9ade..bf8aa0ea35d1 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -1083,6 +1083,8 @@ static int atmel_pinctrl_probe(struct platform_device *pdev) /* Pin naming convention: P(bank_name)(bank_pin_number). */ pin_desc[i].name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "P%c%d", bank + 'A', line); + if (!pin_desc[i].name) + return -ENOMEM; group->name = group_names[i] = pin_desc[i].name; group->pin = pin_desc[i].number; From 07c19c0ad4b07f4b598da369714de028f6a6a323 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 8 Jun 2023 10:58:49 +0100 Subject: [PATCH 191/509] powerpc/powernv/sriov: perform null check on iov before dereferencing iov [ Upstream commit f4f913c980bc6abe0ccfe88fe3909c125afe4a2d ] Currently pointer iov is being dereferenced before the null check of iov which can lead to null pointer dereference errors. Fix this by moving the iov null check before the dereferencing. Detected using cppcheck static analysis: linux/arch/powerpc/platforms/powernv/pci-sriov.c:597:12: warning: Either the condition '!iov' is redundant or there is possible null pointer dereference: iov. [nullPointerRedundantCheck] num_vfs = iov->num_vfs; ^ Fixes: 052da31d45fc ("powerpc/powernv/sriov: De-indent setup and teardown") Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman Link: https://msgid.link/20230608095849.1147969-1-colin.i.king@gmail.com Signed-off-by: Sasha Levin --- arch/powerpc/platforms/powernv/pci-sriov.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c index 28aac933a439..e3e52ff2cbf5 100644 --- a/arch/powerpc/platforms/powernv/pci-sriov.c +++ b/arch/powerpc/platforms/powernv/pci-sriov.c @@ -600,12 +600,12 @@ static void pnv_pci_sriov_disable(struct pci_dev *pdev) struct pnv_iov_data *iov; iov = pnv_iov_get(pdev); - num_vfs = iov->num_vfs; - base_pe = iov->vf_pe_arr[0].pe_number; - if (WARN_ON(!iov)) return; + num_vfs = iov->num_vfs; + base_pe = iov->vf_pe_arr[0].pe_number; + /* Release VF PEs */ pnv_ioda_release_vf_PE(pdev); From bfad11018806e5e33b7b20d2d046bde699309457 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Jul 2021 18:09:53 -0700 Subject: [PATCH 192/509] mm: rename pud_page_vaddr to pud_pgtable and make it return pmd_t * [ Upstream commit 9cf6fa2458443118b84090aa1bf7a3630b5940e8 ] No functional change in this patch. [aneesh.kumar@linux.ibm.com: fix] Link: https://lkml.kernel.org/r/87wnqtnb60.fsf@linux.ibm.com [sfr@canb.auug.org.au: another fix] Link: https://lkml.kernel.org/r/20210619134410.89559-1-aneesh.kumar@linux.ibm.com Link: https://lkml.kernel.org/r/20210615110859.320299-1-aneesh.kumar@linux.ibm.com Link: https://lore.kernel.org/linuxppc-dev/CAHk-=wi+J+iodze9FtjM3Zi4j4OeS+qqbKxME9QN4roxPEXH9Q@mail.gmail.com/ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Stephen Rothwell Cc: Christophe Leroy Cc: Hugh Dickins Cc: Joel Fernandes Cc: Kalesh Singh Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Stable-dep-of: 0da90af431ab ("powerpc/book3s64/mm: Fix DirectMap stats in /proc/meminfo") Signed-off-by: Sasha Levin --- arch/alpha/include/asm/pgtable.h | 8 +++++--- arch/arm/include/asm/pgtable-3level.h | 2 +- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/ia64/include/asm/pgtable.h | 2 +- arch/m68k/include/asm/motorola_pgtable.h | 2 +- arch/mips/include/asm/pgtable-64.h | 4 ++-- arch/parisc/include/asm/pgtable.h | 4 ++-- arch/powerpc/include/asm/book3s/64/pgtable.h | 6 +++++- arch/powerpc/include/asm/nohash/64/pgtable.h | 6 +++++- arch/powerpc/mm/book3s64/radix_pgtable.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 2 +- arch/riscv/include/asm/pgtable-64.h | 4 ++-- arch/sh/include/asm/pgtable-3level.h | 4 ++-- arch/sparc/include/asm/pgtable_32.h | 6 +++--- arch/sparc/include/asm/pgtable_64.h | 6 +++--- arch/um/include/asm/pgtable-3level.h | 2 +- arch/x86/include/asm/pgtable.h | 4 ++-- arch/x86/mm/pat/set_memory.c | 4 ++-- arch/x86/mm/pgtable.c | 2 +- include/asm-generic/pgtable-nopmd.h | 2 +- include/asm-generic/pgtable-nopud.h | 2 +- include/linux/pgtable.h | 2 +- 22 files changed, 46 insertions(+), 36 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 660b14ce1317..12c120e436a2 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -241,8 +241,10 @@ pmd_page_vaddr(pmd_t pmd) #define pud_page(pud) (mem_map + ((pud_val(pud) & _PFN_MASK) >> 32)) #endif -extern inline unsigned long pud_page_vaddr(pud_t pgd) -{ return PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } +extern inline pmd_t *pud_pgtable(pud_t pgd) +{ + return (pmd_t *)(PAGE_OFFSET + ((pud_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT))); +} extern inline int pte_none(pte_t pte) { return !pte_val(pte); } extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_VALID; } @@ -292,7 +294,7 @@ extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; retu /* Find an entry in the second-level page table.. */ extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address) { - pmd_t *ret = (pmd_t *) pud_page_vaddr(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); + pmd_t *ret = pud_pgtable(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); smp_rmb(); /* see above */ return ret; } diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 2b85d175e999..4487aea88477 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -130,7 +130,7 @@ flush_pmd_entry(pudp); \ } while (0) -static inline pmd_t *pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3f74db7b0a31..3635d48ada17 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -633,9 +633,9 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) return __pud_to_phys(pud); } -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { - return (unsigned long)__va(pud_page_paddr(pud)); + return (pmd_t *)__va(pud_page_paddr(pud)); } /* Find an entry in the second-level page table. */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 9f64fdfbf275..fd92792d148b 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -279,7 +279,7 @@ extern unsigned long VMALLOC_END; #define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud))) #define pud_present(pud) (pud_val(pud) != 0UL) #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & _PFN_MASK)) +#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & _PFN_MASK)) #define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) #if CONFIG_PGTABLE_LEVELS == 4 diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 8076467eff4b..956c80874f98 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -129,7 +129,7 @@ static inline void pud_set(pud_t *pudp, pmd_t *pmdp) #define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK)) #define pmd_page_vaddr(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK)) -#define pud_page_vaddr(pud) ((unsigned long)__va(pud_val(pud) & _TABLE_MASK)) +#define pud_pgtable(pud) ((pmd_t *)__va(pud_val(pud) & _TABLE_MASK)) #define pte_none(pte) (!pte_val(pte)) diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 1e7d6ce9d8d6..ab305453e90f 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -314,9 +314,9 @@ static inline void pud_clear(pud_t *pudp) #endif #ifndef __PAGETABLE_PMD_FOLDED -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { - return pud_val(pud); + return (pmd_t *)pud_val(pud); } #define pud_phys(pud) virt_to_phys((void *)pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 8964798b8274..ade591927cbf 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -330,8 +330,8 @@ static inline void pmd_clear(pmd_t *pmd) { #if CONFIG_PGTABLE_LEVELS == 3 -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_address(pud))) -#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) +#define pud_pgtable(pud) ((pmd_t *) __va(pud_address(pud))) +#define pud_page(pud) virt_to_page((void *)pud_pgtable(pud)) /* For 64 bit we have three level tables */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 71e2c524f1ee..5ebf6450f6da 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1030,9 +1030,13 @@ extern struct page *p4d_page(p4d_t p4d); /* Pointers in the page table tree are physical addresses */ #define __pgtable_ptr_val(ptr) __pa(ptr) -#define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) #define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) +static inline pmd_t *pud_pgtable(pud_t pud) +{ + return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS); +} + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 1eacff0fff02..a4d475c0fc2c 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -164,7 +164,11 @@ static inline void pud_clear(pud_t *pudp) #define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \ || (pud_val(pud) & PUD_BAD_BITS)) #define pud_present(pud) (pud_val(pud) != 0) -#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS) + +static inline pmd_t *pud_pgtable(pud_t pud) +{ + return (pmd_t *)(pud_val(pud) & ~PUD_MASKED_BITS); +} extern struct page *pud_page(pud_t pud); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 5f0a2fa611fa..605c770dd819 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -864,7 +864,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, continue; } - pmd_base = (pmd_t *)pud_page_vaddr(*pud); + pmd_base = pud_pgtable(*pud); remove_pmd_table(pmd_base, addr, next); free_pmd_table(pmd_base, pud); } @@ -1156,7 +1156,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) pmd_t *pmd; int i; - pmd = (pmd_t *)pud_page_vaddr(*pud); + pmd = pud_pgtable(*pud); pud_clear(pud); flush_tlb_kernel_range(addr, addr + PUD_SIZE); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index aefc2bfdf104..bd0d903196d9 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -117,7 +117,7 @@ struct page *pud_page(pud_t pud) VM_WARN_ON(!pud_huge(pud)); return pte_page(pud_pte(pud)); } - return virt_to_page(pud_page_vaddr(pud)); + return virt_to_page(pud_pgtable(pud)); } /* diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index f3b0da64c6c8..0e863f3f7187 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -60,9 +60,9 @@ static inline void pud_clear(pud_t *pudp) set_pud(pudp, __pud(0)); } -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { - return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); + return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); } static inline struct page *pud_page(pud_t pud) diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 82d74472dfcd..56bf35c2f29c 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -32,9 +32,9 @@ typedef struct { unsigned long long pmd; } pmd_t; #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { - return pud_val(pud); + return (pmd_t *)pud_val(pud); } /* only used by the stubbed out hugetlb gup code, should never be called */ diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 632cdb959542..7d1d10a8fd93 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -152,13 +152,13 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) return (unsigned long)__nocache_va(v << 4); } -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { if (srmmu_device_memory(pud_val(pud))) { - return ~0; + return (pmd_t *)~0; } else { unsigned long v = pud_val(pud) & SRMMU_PTD_PMASK; - return (unsigned long)__nocache_va(v << 4); + return (pmd_t *)__nocache_va(v << 4); } } diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 7ef6affa105e..cac02ac301f1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -845,18 +845,18 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) return ((unsigned long) __va(pfn << PAGE_SHIFT)); } -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { pte_t pte = __pte(pud_val(pud)); unsigned long pfn; pfn = pte_pfn(pte); - return ((unsigned long) __va(pfn << PAGE_SHIFT)); + return ((pmd_t *) __va(pfn << PAGE_SHIFT)); } #define pmd_page(pmd) virt_to_page((void *)pmd_page_vaddr(pmd)) -#define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) +#define pud_page(pud) virt_to_page((void *)pud_pgtable(pud)) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index 7e6a4180db9d..091bff319ccd 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -84,7 +84,7 @@ static inline void pud_clear (pud_t *pud) } #define pud_page(pud) phys_to_page(pud_val(pud) & PAGE_MASK) -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PAGE_MASK)) +#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & PAGE_MASK)) static inline unsigned long pte_pfn(pte_t pte) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 87de9f2d71cf..a90f6d02fb96 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -865,9 +865,9 @@ static inline int pud_present(pud_t pud) return pud_flags(pud) & _PAGE_PRESENT; } -static inline unsigned long pud_page_vaddr(pud_t pud) +static inline pmd_t *pud_pgtable(pud_t pud) { - return (unsigned long)__va(pud_val(pud) & pud_pfn_mask(pud)); + return (pmd_t *)__va(pud_val(pud) & pud_pfn_mask(pud)); } /* diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 40baa90e74f4..217dda690ed8 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1126,7 +1126,7 @@ static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, unsigned long start, unsigned long end) { if (unmap_pte_range(pmd, start, end)) - if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + if (try_to_free_pmd_page(pud_pgtable(*pud))) pud_clear(pud); } @@ -1170,7 +1170,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) * Try again to free the PMD page if haven't succeeded above. */ if (!pud_none(*pud)) - if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + if (try_to_free_pmd_page(pud_pgtable(*pud))) pud_clear(pud); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index f6a9e2e36642..204b25ee26f0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -805,7 +805,7 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) pte_t *pte; int i; - pmd = (pmd_t *)pud_page_vaddr(*pud); + pmd = pud_pgtable(*pud); pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL); if (!pmd_sv) return 0; diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index 3e13acd019ae..10789cf51d16 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -51,7 +51,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address) #define __pmd(x) ((pmd_t) { __pud(x) } ) #define pud_page(pud) (pmd_page((pmd_t){ pud })) -#define pud_page_vaddr(pud) (pmd_page_vaddr((pmd_t){ pud })) +#define pud_pgtable(pud) ((pmd_t *)(pmd_page_vaddr((pmd_t){ pud }))) /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index a9d751fbda9e..7cbd15f70bf5 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -49,7 +49,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) #define __pud(x) ((pud_t) { __p4d(x) }) #define p4d_page(p4d) (pud_page((pud_t){ p4d })) -#define p4d_page_vaddr(p4d) (pud_page_vaddr((pud_t){ p4d })) +#define p4d_page_vaddr(p4d) (pud_pgtable((pud_t){ p4d })) /* * allocating and freeing a pud is trivial: the 1-entry pud is diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 9def1ac19546..f8570799bc26 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -89,7 +89,7 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) #ifndef pmd_offset static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); + return pud_pgtable(*pud) + pmd_index(address); } #define pmd_offset pmd_offset #endif From 7289ca7a51704e6c3f8d30cbd8ed980637e1267a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 7 Jul 2021 18:09:56 -0700 Subject: [PATCH 193/509] mm: rename p4d_page_vaddr to p4d_pgtable and make it return pud_t * [ Upstream commit dc4875f0e791de554bdc45aa1dbd6e45e107e50f ] No functional change in this patch. [aneesh.kumar@linux.ibm.com: m68k build error reported by kernel robot] Link: https://lkml.kernel.org/r/87tulxnb2v.fsf@linux.ibm.com Link: https://lkml.kernel.org/r/20210615110859.320299-2-aneesh.kumar@linux.ibm.com Link: https://lore.kernel.org/linuxppc-dev/CAHk-=wi+J+iodze9FtjM3Zi4j4OeS+qqbKxME9QN4roxPEXH9Q@mail.gmail.com/ Signed-off-by: Aneesh Kumar K.V Cc: Christophe Leroy Cc: Hugh Dickins Cc: Joel Fernandes Cc: Kalesh Singh Cc: Kirill A. Shutemov Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Stable-dep-of: 0da90af431ab ("powerpc/book3s64/mm: Fix DirectMap stats in /proc/meminfo") Signed-off-by: Sasha Levin --- arch/arm64/include/asm/pgtable.h | 4 ++-- arch/ia64/include/asm/pgtable.h | 2 +- arch/mips/include/asm/pgtable-64.h | 4 ++-- arch/powerpc/include/asm/book3s/64/pgtable.h | 5 ++++- arch/powerpc/include/asm/nohash/64/pgtable-4k.h | 6 +++++- arch/powerpc/mm/book3s64/radix_pgtable.c | 2 +- arch/powerpc/mm/pgtable_64.c | 2 +- arch/sparc/include/asm/pgtable_64.h | 4 ++-- arch/x86/include/asm/pgtable.h | 4 ++-- arch/x86/mm/init_64.c | 4 ++-- include/asm-generic/pgtable-nop4d.h | 2 +- include/asm-generic/pgtable-nopud.h | 2 +- include/linux/pgtable.h | 2 +- 13 files changed, 25 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3635d48ada17..4eedfd784cf6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -694,9 +694,9 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d) return __p4d_to_phys(p4d); } -static inline unsigned long p4d_page_vaddr(p4d_t p4d) +static inline pud_t *p4d_pgtable(p4d_t p4d) { - return (unsigned long)__va(p4d_page_paddr(p4d)); + return (pud_t *)__va(p4d_page_paddr(p4d)); } /* Find an entry in the frst-level page table. */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index fd92792d148b..6e5c38756657 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -287,7 +287,7 @@ extern unsigned long VMALLOC_END; #define p4d_bad(p4d) (!ia64_phys_addr_valid(p4d_val(p4d))) #define p4d_present(p4d) (p4d_val(p4d) != 0UL) #define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL) -#define p4d_page_vaddr(p4d) ((unsigned long) __va(p4d_val(p4d) & _PFN_MASK)) +#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & _PFN_MASK)) #define p4d_page(p4d) virt_to_page((p4d_val(p4d) + PAGE_OFFSET)) #endif diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index ab305453e90f..b865edff2670 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -210,9 +210,9 @@ static inline void p4d_clear(p4d_t *p4dp) p4d_val(*p4dp) = (unsigned long)invalid_pud_table; } -static inline unsigned long p4d_page_vaddr(p4d_t p4d) +static inline pud_t *p4d_pgtable(p4d_t p4d) { - return p4d_val(p4d); + return (pud_t *)p4d_val(p4d); } #define p4d_phys(p4d) virt_to_phys((void *)p4d_val(p4d)) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 5ebf6450f6da..2b4af824bdc5 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1030,7 +1030,10 @@ extern struct page *p4d_page(p4d_t p4d); /* Pointers in the page table tree are physical addresses */ #define __pgtable_ptr_val(ptr) __pa(ptr) -#define p4d_page_vaddr(p4d) __va(p4d_val(p4d) & ~P4D_MASKED_BITS) +static inline pud_t *p4d_pgtable(p4d_t p4d) +{ + return (pud_t *)__va(p4d_val(p4d) & ~P4D_MASKED_BITS); +} static inline pmd_t *pud_pgtable(pud_t pud) { diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index fe2f4c9acd9e..10f5cf444d72 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -56,10 +56,14 @@ #define p4d_none(p4d) (!p4d_val(p4d)) #define p4d_bad(p4d) (p4d_val(p4d) == 0) #define p4d_present(p4d) (p4d_val(p4d) != 0) -#define p4d_page_vaddr(p4d) (p4d_val(p4d) & ~P4D_MASKED_BITS) #ifndef __ASSEMBLY__ +static inline pud_t *p4d_pgtable(p4d_t p4d) +{ + return (pud_t *) (p4d_val(p4d) & ~P4D_MASKED_BITS); +} + static inline void p4d_clear(p4d_t *p4dp) { *p4dp = __p4d(0); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 605c770dd819..44239e0acf8e 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -898,7 +898,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) continue; } - pud_base = (pud_t *)p4d_page_vaddr(*p4d); + pud_base = p4d_pgtable(*p4d); remove_pud_table(pud_base, addr, next); free_pud_table(pud_base, p4d); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index bd0d903196d9..175aabf101e8 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -106,7 +106,7 @@ struct page *p4d_page(p4d_t p4d) VM_WARN_ON(!p4d_huge(p4d)); return pte_page(p4d_pte(p4d)); } - return virt_to_page(p4d_page_vaddr(p4d)); + return virt_to_page(p4d_pgtable(p4d)); } #endif diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index cac02ac301f1..5a1efd600f77 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -860,8 +860,8 @@ static inline pmd_t *pud_pgtable(pud_t pud) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) #define pud_present(pud) (pud_val(pud) != 0U) #define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define p4d_page_vaddr(p4d) \ - ((unsigned long) __va(p4d_val(p4d))) +#define p4d_pgtable(p4d) \ + ((pud_t *) __va(p4d_val(p4d))) #define p4d_present(p4d) (p4d_val(p4d) != 0U) #define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a90f6d02fb96..9bacde3ff514 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -906,9 +906,9 @@ static inline int p4d_present(p4d_t p4d) return p4d_flags(p4d) & _PAGE_PRESENT; } -static inline unsigned long p4d_page_vaddr(p4d_t p4d) +static inline pud_t *p4d_pgtable(p4d_t p4d) { - return (unsigned long)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); + return (pud_t *)__va(p4d_val(p4d) & p4d_pfn_mask(p4d)); } /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 20951ab522a1..acf4e50c5988 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -193,8 +193,8 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end) spin_lock(pgt_lock); if (!p4d_none(*p4d_ref) && !p4d_none(*p4d)) - BUG_ON(p4d_page_vaddr(*p4d) - != p4d_page_vaddr(*p4d_ref)); + BUG_ON(p4d_pgtable(*p4d) + != p4d_pgtable(*p4d_ref)); if (p4d_none(*p4d)) set_p4d(p4d, *p4d_ref); diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h index ce2cbb3c380f..2f1d0aad645c 100644 --- a/include/asm-generic/pgtable-nop4d.h +++ b/include/asm-generic/pgtable-nop4d.h @@ -42,7 +42,7 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) #define __p4d(x) ((p4d_t) { __pgd(x) }) #define pgd_page(pgd) (p4d_page((p4d_t){ pgd })) -#define pgd_page_vaddr(pgd) (p4d_page_vaddr((p4d_t){ pgd })) +#define pgd_page_vaddr(pgd) ((unsigned long)(p4d_pgtable((p4d_t){ pgd }))) /* * allocating and freeing a p4d is trivial: the 1-entry p4d is diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h index 7cbd15f70bf5..eb70c6d7ceff 100644 --- a/include/asm-generic/pgtable-nopud.h +++ b/include/asm-generic/pgtable-nopud.h @@ -49,7 +49,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) #define __pud(x) ((pud_t) { __p4d(x) }) #define p4d_page(p4d) (pud_page((pud_t){ p4d })) -#define p4d_page_vaddr(p4d) (pud_pgtable((pud_t){ p4d })) +#define p4d_pgtable(p4d) ((pud_t *)(pud_pgtable((pud_t){ p4d }))) /* * allocating and freeing a pud is trivial: the 1-entry pud is diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index f8570799bc26..f924468d84ec 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -97,7 +97,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #ifndef pud_offset static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { - return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address); + return p4d_pgtable(*p4d) + pud_index(address); } #define pud_offset pud_offset #endif From 7afd0de0cc1452daf868fed16d3227d5cd0d77c5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 16 Jun 2023 16:38:13 +0530 Subject: [PATCH 194/509] powerpc/book3s64/mm: Fix DirectMap stats in /proc/meminfo [ Upstream commit 0da90af431abc3f497a38ec9ef6e43b0d0dabe80 ] On memory unplug reduce DirectMap page count correctly. root@ubuntu-guest:# grep Direct /proc/meminfo DirectMap4k: 0 kB DirectMap64k: 0 kB DirectMap2M: 115343360 kB DirectMap1G: 0 kB Before fix: root@ubuntu-guest:# ndctl disable-namespace all disabled 1 namespace root@ubuntu-guest:# grep Direct /proc/meminfo DirectMap4k: 0 kB DirectMap64k: 0 kB DirectMap2M: 115343360 kB DirectMap1G: 0 kB After fix: root@ubuntu-guest:# ndctl disable-namespace all disabled 1 namespace root@ubuntu-guest:# grep Direct /proc/meminfo DirectMap4k: 0 kB DirectMap64k: 0 kB DirectMap2M: 104857600 kB DirectMap1G: 0 kB Fixes: a2dc009afa9a ("powerpc/mm/book3s/radix: Add mapping statistics") Signed-off-by: Aneesh Kumar K.V Tested-by: Sachin Sant > Signed-off-by: Michael Ellerman Link: https://msgid.link/20230616110826.344417-4-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/book3s64/radix_pgtable.c | 34 +++++++++++++++--------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 44239e0acf8e..3728c17de87e 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -783,9 +783,9 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d) } static void remove_pte_table(pte_t *pte_start, unsigned long addr, - unsigned long end) + unsigned long end, bool direct) { - unsigned long next; + unsigned long next, pages = 0; pte_t *pte; pte = pte_start + pte_index(addr); @@ -807,13 +807,16 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr, } pte_clear(&init_mm, addr, pte); + pages++; } + if (direct) + update_page_count(mmu_virtual_psize, -pages); } static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, - unsigned long end) + unsigned long end, bool direct) { - unsigned long next; + unsigned long next, pages = 0; pte_t *pte_base; pmd_t *pmd; @@ -831,19 +834,22 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, continue; } pte_clear(&init_mm, addr, (pte_t *)pmd); + pages++; continue; } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next); + remove_pte_table(pte_base, addr, next, direct); free_pte_table(pte_base, pmd); } + if (direct) + update_page_count(MMU_PAGE_2M, -pages); } static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, - unsigned long end) + unsigned long end, bool direct) { - unsigned long next; + unsigned long next, pages = 0; pmd_t *pmd_base; pud_t *pud; @@ -861,16 +867,20 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, continue; } pte_clear(&init_mm, addr, (pte_t *)pud); + pages++; continue; } pmd_base = pud_pgtable(*pud); - remove_pmd_table(pmd_base, addr, next); + remove_pmd_table(pmd_base, addr, next, direct); free_pmd_table(pmd_base, pud); } + if (direct) + update_page_count(MMU_PAGE_1G, -pages); } -static void __meminit remove_pagetable(unsigned long start, unsigned long end) +static void __meminit remove_pagetable(unsigned long start, unsigned long end, + bool direct) { unsigned long addr, next; pud_t *pud_base; @@ -899,7 +909,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) } pud_base = p4d_pgtable(*p4d); - remove_pud_table(pud_base, addr, next); + remove_pud_table(pud_base, addr, next, direct); free_pud_table(pud_base, p4d); } @@ -922,7 +932,7 @@ int __meminit radix__create_section_mapping(unsigned long start, int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end) { - remove_pagetable(start, end); + remove_pagetable(start, end, true); return 0; } #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -958,7 +968,7 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, #ifdef CONFIG_MEMORY_HOTPLUG void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) { - remove_pagetable(start, start + page_size); + remove_pagetable(start, start + page_size, false); } #endif #endif From aba192bb31df0da0eb5984132d0304d517708423 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 16 Jun 2023 16:38:15 +0530 Subject: [PATCH 195/509] powerpc/mm/dax: Fix the condition when checking if altmap vmemap can cross-boundary [ Upstream commit c8eebc4a99f15280654f23e914e746c40a516e50 ] Without this fix, the last subsection vmemmap can end up in memory even if the namespace is created with -M mem and has sufficient space in the altmap area. Fixes: cf387d9644d8 ("libnvdimm/altmap: Track namespace boundaries in altmap") Signed-off-by: Aneesh Kumar K.V Tested-by: Sachin Sant > Signed-off-by: Michael Ellerman Link: https://msgid.link/20230616110826.344417-6-aneesh.kumar@linux.ibm.com Signed-off-by: Sasha Levin --- arch/powerpc/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 386be136026e..b76cd49d521b 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -188,7 +188,7 @@ static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long star unsigned long nr_pfn = page_size / sizeof(struct page); unsigned long start_pfn = page_to_pfn((struct page *)start); - if ((start_pfn + nr_pfn) > altmap->end_pfn) + if ((start_pfn + nr_pfn - 1) > altmap->end_pfn) return true; if (start_pfn < altmap->base_pfn) From 5f23dae018c6b916e96ca398fa933750cca939e2 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 28 Oct 2021 12:11:08 +0200 Subject: [PATCH 196/509] hwrng: virtio - add an internal buffer [ Upstream commit bf3175bc50a3754dc427e2f5046e17a9fafc8be7 ] hwrng core uses two buffers that can be mixed in the virtio-rng queue. If the buffer is provided with wait=0 it is enqueued in the virtio-rng queue but unused by the caller. On the next call, core provides another buffer but the first one is filled instead and the new one queued. And the caller reads the data from the new one that is not updated, and the data in the first one are lost. To avoid this mix, virtio-rng needs to use its own unique internal buffer at a cost of a data copy to the caller buffer. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20211028101111.128049-2-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Stable-dep-of: ac52578d6e8d ("hwrng: virtio - Fix race on data_avail and actual data") Signed-off-by: Sasha Levin --- drivers/char/hw_random/virtio-rng.c | 43 ++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index a90001e02bf7..208c547dcac1 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -18,13 +18,20 @@ static DEFINE_IDA(rng_index_ida); struct virtrng_info { struct hwrng hwrng; struct virtqueue *vq; - struct completion have_data; char name[25]; - unsigned int data_avail; int index; bool busy; bool hwrng_register_done; bool hwrng_removed; + /* data transfer */ + struct completion have_data; + unsigned int data_avail; + /* minimal size returned by rng_buffer_size() */ +#if SMP_CACHE_BYTES < 32 + u8 data[32]; +#else + u8 data[SMP_CACHE_BYTES]; +#endif }; static void random_recv_done(struct virtqueue *vq) @@ -39,14 +46,14 @@ static void random_recv_done(struct virtqueue *vq) } /* The host will fill any buffer we give it with sweet, sweet randomness. */ -static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size) +static void register_buffer(struct virtrng_info *vi) { struct scatterlist sg; - sg_init_one(&sg, buf, size); + sg_init_one(&sg, vi->data, sizeof(vi->data)); /* There should always be room for one buffer. */ - virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL); + virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); virtqueue_kick(vi->vq); } @@ -55,6 +62,8 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { int ret; struct virtrng_info *vi = (struct virtrng_info *)rng->priv; + unsigned int chunk; + size_t read; if (vi->hwrng_removed) return -ENODEV; @@ -62,19 +71,33 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) if (!vi->busy) { vi->busy = true; reinit_completion(&vi->have_data); - register_buffer(vi, buf, size); + register_buffer(vi); } if (!wait) return 0; - ret = wait_for_completion_killable(&vi->have_data); - if (ret < 0) - return ret; + read = 0; + while (size != 0) { + ret = wait_for_completion_killable(&vi->have_data); + if (ret < 0) + return ret; + + chunk = min_t(unsigned int, size, vi->data_avail); + memcpy(buf + read, vi->data, chunk); + read += chunk; + size -= chunk; + vi->data_avail = 0; + + if (size != 0) { + reinit_completion(&vi->have_data); + register_buffer(vi); + } + } vi->busy = false; - return vi->data_avail; + return read; } static void virtio_cleanup(struct hwrng *rng) From d13ea82bfe150fce7cbd5defc888d31d3a33fc91 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 28 Oct 2021 12:11:09 +0200 Subject: [PATCH 197/509] hwrng: virtio - don't wait on cleanup [ Upstream commit 2bb31abdbe55742c89f4dc0cc26fcbc8467364f6 ] When virtio-rng device was dropped by the hwrng core we were forced to wait the buffer to come back from the device to not have remaining ongoing operation that could spoil the buffer. But now, as the buffer is internal to the virtio-rng we can release the waiting loop immediately, the buffer will be retrieve and use when the virtio-rng driver will be selected again. This avoids to hang on an rng_current write command if the virtio-rng device is blocked by a lack of entropy. This allows to select another entropy source if the current one is empty. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20211028101111.128049-3-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Stable-dep-of: ac52578d6e8d ("hwrng: virtio - Fix race on data_avail and actual data") Signed-off-by: Sasha Levin --- drivers/char/hw_random/virtio-rng.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 208c547dcac1..173aeea835bb 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -82,6 +82,11 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; + /* if vi->data_avail is 0, we have been interrupted + * by a cleanup, but buffer stays in the queue + */ + if (vi->data_avail == 0) + return read; chunk = min_t(unsigned int, size, vi->data_avail); memcpy(buf + read, vi->data, chunk); @@ -105,7 +110,7 @@ static void virtio_cleanup(struct hwrng *rng) struct virtrng_info *vi = (struct virtrng_info *)rng->priv; if (vi->busy) - wait_for_completion(&vi->have_data); + complete(&vi->have_data); } static int probe_common(struct virtio_device *vdev) From ffc5ce9c272fc39de3acae3f4a1124bd06b9f54b Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 28 Oct 2021 12:11:10 +0200 Subject: [PATCH 198/509] hwrng: virtio - don't waste entropy [ Upstream commit 5c8e933050044d6dd2a000f9a5756ae73cbe7c44 ] if we don't use all the entropy available in the buffer, keep it and use it later. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20211028101111.128049-4-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Stable-dep-of: ac52578d6e8d ("hwrng: virtio - Fix race on data_avail and actual data") Signed-off-by: Sasha Levin --- drivers/char/hw_random/virtio-rng.c | 52 +++++++++++++++++++---------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 173aeea835bb..8ba97cf4ca8f 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -26,6 +26,7 @@ struct virtrng_info { /* data transfer */ struct completion have_data; unsigned int data_avail; + unsigned int data_idx; /* minimal size returned by rng_buffer_size() */ #if SMP_CACHE_BYTES < 32 u8 data[32]; @@ -42,6 +43,9 @@ static void random_recv_done(struct virtqueue *vq) if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) return; + vi->data_idx = 0; + vi->busy = false; + complete(&vi->have_data); } @@ -58,6 +62,16 @@ static void register_buffer(struct virtrng_info *vi) virtqueue_kick(vi->vq); } +static unsigned int copy_data(struct virtrng_info *vi, void *buf, + unsigned int size) +{ + size = min_t(unsigned int, size, vi->data_avail); + memcpy(buf, vi->data + vi->data_idx, size); + vi->data_idx += size; + vi->data_avail -= size; + return size; +} + static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) { int ret; @@ -68,17 +82,29 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) if (vi->hwrng_removed) return -ENODEV; - if (!vi->busy) { - vi->busy = true; - reinit_completion(&vi->have_data); - register_buffer(vi); + read = 0; + + /* copy available data */ + if (vi->data_avail) { + chunk = copy_data(vi, buf, size); + size -= chunk; + read += chunk; } if (!wait) - return 0; + return read; - read = 0; + /* We have already copied available entropy, + * so either size is 0 or data_avail is 0 + */ while (size != 0) { + /* data_avail is 0 */ + if (!vi->busy) { + /* no pending request, ask for more */ + vi->busy = true; + reinit_completion(&vi->have_data); + register_buffer(vi); + } ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; @@ -88,20 +114,11 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) if (vi->data_avail == 0) return read; - chunk = min_t(unsigned int, size, vi->data_avail); - memcpy(buf + read, vi->data, chunk); - read += chunk; + chunk = copy_data(vi, buf + read, size); size -= chunk; - vi->data_avail = 0; - - if (size != 0) { - reinit_completion(&vi->have_data); - register_buffer(vi); - } + read += chunk; } - vi->busy = false; - return read; } @@ -161,6 +178,7 @@ static void remove_common(struct virtio_device *vdev) vi->hwrng_removed = true; vi->data_avail = 0; + vi->data_idx = 0; complete(&vi->have_data); vdev->config->reset(vdev); vi->busy = false; From e8f51401d642a8070851f1c7d9f415fe1cf8f3fc Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 28 Oct 2021 12:11:11 +0200 Subject: [PATCH 199/509] hwrng: virtio - always add a pending request [ Upstream commit 9a4b612d675b03f7fc9fa1957ca399c8223f3954 ] If we ensure we have already some data available by enqueuing again the buffer once data are exhausted, we can return what we have without waiting for the device answer. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20211028101111.128049-5-lvivier@redhat.com Signed-off-by: Michael S. Tsirkin Stable-dep-of: ac52578d6e8d ("hwrng: virtio - Fix race on data_avail and actual data") Signed-off-by: Sasha Levin --- drivers/char/hw_random/virtio-rng.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 8ba97cf4ca8f..0a7dde135db1 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -20,7 +20,6 @@ struct virtrng_info { struct virtqueue *vq; char name[25]; int index; - bool busy; bool hwrng_register_done; bool hwrng_removed; /* data transfer */ @@ -44,16 +43,18 @@ static void random_recv_done(struct virtqueue *vq) return; vi->data_idx = 0; - vi->busy = false; complete(&vi->have_data); } -/* The host will fill any buffer we give it with sweet, sweet randomness. */ -static void register_buffer(struct virtrng_info *vi) +static void request_entropy(struct virtrng_info *vi) { struct scatterlist sg; + reinit_completion(&vi->have_data); + vi->data_avail = 0; + vi->data_idx = 0; + sg_init_one(&sg, vi->data, sizeof(vi->data)); /* There should always be room for one buffer. */ @@ -69,6 +70,8 @@ static unsigned int copy_data(struct virtrng_info *vi, void *buf, memcpy(buf, vi->data + vi->data_idx, size); vi->data_idx += size; vi->data_avail -= size; + if (vi->data_avail == 0) + request_entropy(vi); return size; } @@ -98,13 +101,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) * so either size is 0 or data_avail is 0 */ while (size != 0) { - /* data_avail is 0 */ - if (!vi->busy) { - /* no pending request, ask for more */ - vi->busy = true; - reinit_completion(&vi->have_data); - register_buffer(vi); - } + /* data_avail is 0 but a request is pending */ ret = wait_for_completion_killable(&vi->have_data); if (ret < 0) return ret; @@ -126,8 +123,7 @@ static void virtio_cleanup(struct hwrng *rng) { struct virtrng_info *vi = (struct virtrng_info *)rng->priv; - if (vi->busy) - complete(&vi->have_data); + complete(&vi->have_data); } static int probe_common(struct virtio_device *vdev) @@ -163,6 +159,9 @@ static int probe_common(struct virtio_device *vdev) goto err_find; } + /* we always have a pending entropy request */ + request_entropy(vi); + return 0; err_find: @@ -181,7 +180,6 @@ static void remove_common(struct virtio_device *vdev) vi->data_idx = 0; complete(&vi->have_data); vdev->config->reset(vdev); - vi->busy = false; if (vi->hwrng_register_done) hwrng_unregister(&vi->hwrng); vdev->config->del_vqs(vdev); From 77471e4912d3960dafe141e268c44be8024fe4dc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 May 2023 11:59:32 +0800 Subject: [PATCH 200/509] hwrng: virtio - Fix race on data_avail and actual data [ Upstream commit ac52578d6e8d300dd50f790f29a24169b1edd26c ] The virtio rng device kicks off a new entropy request whenever the data available reaches zero. When a new request occurs at the end of a read operation, that is, when the result of that request is only needed by the next reader, then there is a race between the writing of the new data and the next reader. This is because there is no synchronisation whatsoever between the writer and the reader. Fix this by writing data_avail with smp_store_release and reading it with smp_load_acquire when we first enter read. The subsequent reads are safe because they're either protected by the first load acquire, or by the completion mechanism. Also remove the redundant zeroing of data_idx in random_recv_done (data_idx must already be zero at this point) and data_avail in request_entropy (ditto). Reported-by: syzbot+726dc8c62c3536431ceb@syzkaller.appspotmail.com Fixes: f7f510ec1957 ("virtio: An entropy device, as suggested by hpa.") Signed-off-by: Herbert Xu Acked-by: Michael S. Tsirkin Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/char/hw_random/virtio-rng.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index 0a7dde135db1..3a194eb3ce8a 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -4,6 +4,7 @@ * Copyright (C) 2007, 2008 Rusty Russell IBM Corporation */ +#include #include #include #include @@ -37,13 +38,13 @@ struct virtrng_info { static void random_recv_done(struct virtqueue *vq) { struct virtrng_info *vi = vq->vdev->priv; + unsigned int len; /* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */ - if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) + if (!virtqueue_get_buf(vi->vq, &len)) return; - vi->data_idx = 0; - + smp_store_release(&vi->data_avail, len); complete(&vi->have_data); } @@ -52,7 +53,6 @@ static void request_entropy(struct virtrng_info *vi) struct scatterlist sg; reinit_completion(&vi->have_data); - vi->data_avail = 0; vi->data_idx = 0; sg_init_one(&sg, vi->data, sizeof(vi->data)); @@ -88,7 +88,7 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) read = 0; /* copy available data */ - if (vi->data_avail) { + if (smp_load_acquire(&vi->data_avail)) { chunk = copy_data(vi, buf, size); size -= chunk; read += chunk; From 31195ee328e98d8347124b989437f36ab06c222f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 19 May 2023 15:33:34 -0700 Subject: [PATCH 201/509] crypto: nx - fix build warnings when DEBUG_FS is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b04b076fb56560b39d695ac3744db457e12278fd ] Fix build warnings when DEBUG_FS is not enabled by using an empty do-while loop instead of a value: In file included from ../drivers/crypto/nx/nx.c:27: ../drivers/crypto/nx/nx.c: In function 'nx_register_algs': ../drivers/crypto/nx/nx.h:173:33: warning: statement with no effect [-Wunused-value] 173 | #define NX_DEBUGFS_INIT(drv) (0) ../drivers/crypto/nx/nx.c:573:9: note: in expansion of macro 'NX_DEBUGFS_INIT' 573 | NX_DEBUGFS_INIT(&nx_driver); ../drivers/crypto/nx/nx.c: In function 'nx_remove': ../drivers/crypto/nx/nx.h:174:33: warning: statement with no effect [-Wunused-value] 174 | #define NX_DEBUGFS_FINI(drv) (0) ../drivers/crypto/nx/nx.c:793:17: note: in expansion of macro 'NX_DEBUGFS_FINI' 793 | NX_DEBUGFS_FINI(&nx_driver); Also, there is no need to build nx_debugfs.o when DEBUG_FS is not enabled, so change the Makefile to accommodate that. Fixes: ae0222b7289d ("powerpc/crypto: nx driver code supporting nx encryption") Fixes: aef7b31c8833 ("powerpc/crypto: Build files for the nx device driver") Signed-off-by: Randy Dunlap Cc: Breno LeitĆ£o Cc: Nayna Jain Cc: Paulo Flabiano Smorigo Cc: Herbert Xu Cc: "David S. Miller" Cc: linux-crypto@vger.kernel.org Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/nx/Makefile | 2 +- drivers/crypto/nx/nx.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile index bc89a20e5d9d..351822a598f9 100644 --- a/drivers/crypto/nx/Makefile +++ b/drivers/crypto/nx/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CRYPTO_DEV_NX_ENCRYPT) += nx-crypto.o nx-crypto-objs := nx.o \ - nx_debugfs.o \ nx-aes-cbc.o \ nx-aes-ecb.o \ nx-aes-gcm.o \ @@ -11,6 +10,7 @@ nx-crypto-objs := nx.o \ nx-sha256.o \ nx-sha512.o +nx-crypto-$(CONFIG_DEBUG_FS) += nx_debugfs.o obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o nx-compress.o obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o nx-compress.o nx-compress-objs := nx-842.o diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index c6233173c612..2697baebb6a3 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -170,8 +170,8 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *, unsigned int, void nx_debugfs_init(struct nx_crypto_driver *); void nx_debugfs_fini(struct nx_crypto_driver *); #else -#define NX_DEBUGFS_INIT(drv) (0) -#define NX_DEBUGFS_FINI(drv) (0) +#define NX_DEBUGFS_INIT(drv) do {} while (0) +#define NX_DEBUGFS_FINI(drv) do {} while (0) #endif #define NX_PAGE_NUM(x) ((u64)(x) & 0xfffffffffffff000ULL) From 88978ef7fdefc31e00a2c30ded46f06abc03def1 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 1 Jun 2023 21:09:55 +0900 Subject: [PATCH 202/509] modpost: fix section mismatch message for R_ARM_ABS32 [ Upstream commit b7c63520f6703a25eebb4f8138fed764fcae1c6f ] addend_arm_rel() processes R_ARM_ABS32 in a wrong way. Here, test code. [test code 1] #include int __initdata foo; int get_foo(void) { return foo; } If you compile it with ARM versatile_defconfig, modpost will show the symbol name, (unknown). WARNING: modpost: vmlinux.o: section mismatch in reference: get_foo (section: .text) -> (unknown) (section: .init.data) (You need to use GNU linker instead of LLD to reproduce it.) If you compile it for other architectures, modpost will show the correct symbol name. WARNING: modpost: vmlinux.o: section mismatch in reference: get_foo (section: .text) -> foo (section: .init.data) For R_ARM_ABS32, addend_arm_rel() sets r->r_addend to a wrong value. I just mimicked the code in arch/arm/kernel/module.c. However, there is more difficulty for ARM. Here, test code. [test code 2] #include int __initdata foo; int get_foo(void) { return foo; } int __initdata bar; int get_bar(void) { return bar; } With this commit applied, modpost will show the following messages for ARM versatile_defconfig: WARNING: modpost: vmlinux.o: section mismatch in reference: get_foo (section: .text) -> foo (section: .init.data) WARNING: modpost: vmlinux.o: section mismatch in reference: get_bar (section: .text) -> foo (section: .init.data) The reference from 'get_bar' to 'foo' seems wrong. I have no solution for this because it is true in assembly level. In the following output, relocation at 0x1c is no longer associated with 'bar'. The two relocation entries point to the same symbol, and the offset to 'bar' is encoded in the instruction 'r0, [r3, #4]'. Disassembly of section .text: 00000000 : 0: e59f3004 ldr r3, [pc, #4] @ c 4: e5930000 ldr r0, [r3] 8: e12fff1e bx lr c: 00000000 .word 0x00000000 00000010 : 10: e59f3004 ldr r3, [pc, #4] @ 1c 14: e5930004 ldr r0, [r3, #4] 18: e12fff1e bx lr 1c: 00000000 .word 0x00000000 Relocation section '.rel.text' at offset 0x244 contains 2 entries: Offset Info Type Sym.Value Sym. Name 0000000c 00000c02 R_ARM_ABS32 00000000 .init.data 0000001c 00000c02 R_ARM_ABS32 00000000 .init.data When find_elf_symbol() gets into a situation where relsym->st_name is zero, there is no guarantee to get the symbol name as written in C. I am keeping the current logic because it is useful in many architectures, but the symbol name is not always correct depending on the optimization. I left some comments in find_tosym(). Fixes: 56a974fa2d59 ("kbuild: make better section mismatch reports on arm") Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/mod/modpost.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e48742760fec..9216eae798ff 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1313,6 +1313,10 @@ static Elf_Sym *find_elf_symbol(struct elf_info *elf, Elf64_Sword addr, if (relsym->st_name != 0) return relsym; + /* + * Strive to find a better symbol name, but the resulting name may not + * match the symbol referenced in the original code. + */ relsym_secindex = get_secindex(elf, relsym); for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { if (get_secindex(elf, sym) != relsym_secindex) @@ -1795,12 +1799,14 @@ static int addend_386_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) { unsigned int r_typ = ELF_R_TYPE(r->r_info); + Elf_Sym *sym = elf->symtab_start + ELF_R_SYM(r->r_info); + void *loc = reloc_location(elf, sechdr, r); + uint32_t inst; switch (r_typ) { case R_ARM_ABS32: - /* From ARM ABI: (S + A) | T */ - r->r_addend = (int)(long) - (elf->symtab_start + ELF_R_SYM(r->r_info)); + inst = TO_NATIVE(*(uint32_t *)loc); + r->r_addend = inst + sym->st_value; break; case R_ARM_PC24: case R_ARM_CALL: From b54069445591ba444e8cbbe00f1bce686e771873 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 1 Jun 2023 21:09:56 +0900 Subject: [PATCH 203/509] modpost: fix section mismatch message for R_ARM_{PC24,CALL,JUMP24} [ Upstream commit 56a24b8ce6a7f9c4a21b2276a8644f6f3d8fc14d ] addend_arm_rel() processes R_ARM_PC24, R_ARM_CALL, R_ARM_JUMP24 in a wrong way. Here, test code. [test code for R_ARM_JUMP24] .section .init.text,"ax" bar: bx lr .section .text,"ax" .globl foo foo: b bar [test code for R_ARM_CALL] .section .init.text,"ax" bar: bx lr .section .text,"ax" .globl foo foo: push {lr} bl bar pop {pc} If you compile it with ARM multi_v7_defconfig, modpost will show the symbol name, (unknown). WARNING: modpost: vmlinux.o: section mismatch in reference: foo (section: .text) -> (unknown) (section: .init.text) (You need to use GNU linker instead of LLD to reproduce it.) Fix the code to make modpost show the correct symbol name. I imported (with adjustment) sign_extend32() from include/linux/bitops.h. The '+8' is the compensation for pc-relative instruction. It is documented in "ELF for the Arm Architecture" [1]. "If the relocation is pc-relative then compensation for the PC bias (the PC value is 8 bytes ahead of the executing instruction in Arm state and 4 bytes in Thumb state) must be encoded in the relocation by the object producer." [1]: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst Fixes: 56a974fa2d59 ("kbuild: make better section mismatch reports on arm") Fixes: 6e2e340b59d2 ("ARM: 7324/1: modpost: Fix section warnings for ARM for many compilers") Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/mod/modpost.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 9216eae798ff..fb7f75fa786b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1796,12 +1796,20 @@ static int addend_386_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) #define R_ARM_THM_JUMP19 51 #endif +static int32_t sign_extend32(int32_t value, int index) +{ + uint8_t shift = 31 - index; + + return (int32_t)(value << shift) >> shift; +} + static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) { unsigned int r_typ = ELF_R_TYPE(r->r_info); Elf_Sym *sym = elf->symtab_start + ELF_R_SYM(r->r_info); void *loc = reloc_location(elf, sechdr, r); uint32_t inst; + int32_t offset; switch (r_typ) { case R_ARM_ABS32: @@ -1811,6 +1819,10 @@ static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: + inst = TO_NATIVE(*(uint32_t *)loc); + offset = sign_extend32((inst & 0x00ffffff) << 2, 25); + r->r_addend = offset + sym->st_value + 8; + break; case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: case R_ARM_THM_JUMP19: From f0350516b9d26578e0e520424a825325805c2772 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 May 2023 10:33:04 +0200 Subject: [PATCH 204/509] crypto: marvell/cesa - Fix type mismatch warning [ Upstream commit efbc7764c4446566edb76ca05e903b5905673d2e ] Commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") uncovered a type mismatch in cesa 3des support that leads to a memcpy beyond the end of a structure: In function 'fortify_memcpy_chk', inlined from 'mv_cesa_des3_ede_setkey' at drivers/crypto/marvell/cesa/cipher.c:307:2: include/linux/fortify-string.h:583:25: error: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Werror=attribute-warning] 583 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is probably harmless as the actual data that is copied has the correct type, but clearly worth fixing nonetheless. Fixes: 4ada48397823 ("crypto: marvell/cesa - add Triple-DES support") Cc: Kees Cook Cc: Gustavo A. R. Silva Signed-off-by: Arnd Bergmann Reviewed-by: Kees Cook Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/marvell/cesa/cipher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index 596a8c74e40a..8dc10f998894 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -287,7 +287,7 @@ static int mv_cesa_des_setkey(struct crypto_skcipher *cipher, const u8 *key, static int mv_cesa_des3_ede_setkey(struct crypto_skcipher *cipher, const u8 *key, unsigned int len) { - struct mv_cesa_des_ctx *ctx = crypto_skcipher_ctx(cipher); + struct mv_cesa_des3_ctx *ctx = crypto_skcipher_ctx(cipher); int err; err = verify_skcipher_des3_key(cipher, key); From cb0cdca5c979bc34c27602e2039562932c2591a4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 8 Jun 2023 11:23:40 +0300 Subject: [PATCH 205/509] modpost: fix off by one in is_executable_section() [ Upstream commit 3a3f1e573a105328a2cca45a7cfbebabbf5e3192 ] The > comparison should be >= to prevent an out of bounds array access. Fixes: 52dc0595d540 ("modpost: handle relocations mismatch in __ex_table.") Signed-off-by: Dan Carpenter Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index fb7f75fa786b..78ac98cfa02d 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1621,7 +1621,7 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf, static int is_executable_section(struct elf_info* elf, unsigned int section_index) { - if (section_index > elf->num_sections) + if (section_index >= elf->num_sections) fatal("section_index is outside elf->num_sections!\n"); return ((elf->sechdrs[section_index].sh_flags & SHF_EXECINSTR) == SHF_EXECINSTR); From bab0bf56779769cd0878edb8030b66b73c17f24a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 12 Jun 2023 00:50:50 +0900 Subject: [PATCH 206/509] ARC: define ASM_NL and __ALIGN(_STR) outside #ifdef __ASSEMBLY__ guard [ Upstream commit 92e2921eeafdfca9acd9b83f07d2b7ca099bac24 ] ASM_NL is useful not only in *.S files but also in .c files for using inline assembler in C code. On ARC, however, ASM_NL is evaluated inconsistently. It is expanded to a backquote (`) in *.S files, but a semicolon (;) in *.c files because arch/arc/include/asm/linkage.h defines it inside #ifdef __ASSEMBLY__, so the definition for C code falls back to the default value defined in include/linux/linkage.h. If ASM_NL is used in inline assembler in .c files, it will result in wrong assembly code because a semicolon is not an instruction separator, but the start of a comment for ARC. Move ASM_NL (also __ALIGN and __ALIGN_STR) out of the #ifdef. Fixes: 9df62f054406 ("arch: use ASM_NL instead of ';' for assembler new line character in the macro") Fixes: 8d92e992a785 ("ARC: define __ALIGN_STR and __ALIGN symbols for ARC") Signed-off-by: Masahiro Yamada Signed-off-by: Sasha Levin --- arch/arc/include/asm/linkage.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index c9434ff3aa4c..8a3fb71e9cfa 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -8,6 +8,10 @@ #include +#define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) + #ifdef __ASSEMBLY__ .macro ST2 e, o, off @@ -28,10 +32,6 @@ #endif .endm -#define ASM_NL ` /* use '`' to mark new line in macro */ -#define __ALIGN .align 4 -#define __ALIGN_STR __stringify(__ALIGN) - /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm #ifdef CONFIG_ARC_HAS_DCCM From 7c9f5a14d93b786568604994f4a221254c1862ea Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Sun, 18 Jun 2023 17:32:25 -0400 Subject: [PATCH 207/509] NFSv4.1: freeze the session table upon receiving NFS4ERR_BADSESSION [ Upstream commit c907e72f58ed979a24a9fdcadfbc447c51d5e509 ] When the client received NFS4ERR_BADSESSION, it schedules recovery and start the state manager thread which in turn freezes the session table and does not allow for any new requests to use the no-longer valid session. However, it is possible that before the state manager thread runs, a new operation would use the released slot that received BADSESSION and was therefore not updated its sequence number. Such re-use of the slot can lead the application errors. Fixes: 5c441544f045 ("NFSv4.x: Handle bad/dead sessions correctly in nfs41_sequence_process()") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index bca5d1bdd79b..b9567cc8698e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -926,6 +926,7 @@ static int nfs41_sequence_process(struct rpc_task *task, out_noaction: return ret; session_recover: + set_bit(NFS4_SLOT_TBL_DRAINING, &session->fc_slot_table.slot_tbl_state); nfs4_schedule_session_recovery(session, status); dprintk("%s ERROR: %d Reset session\n", __func__, status); nfs41_sequence_free_slot(res); From 94a85474f5e3e518bdbf8c9f51cb343d734a04f7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:13:54 -0700 Subject: [PATCH 208/509] dax: Fix dax_mapping_release() use after free [ Upstream commit 6d24b170a9db0456f577b1ab01226a2254c016a8 ] A CONFIG_DEBUG_KOBJECT_RELEASE test of removing a device-dax region provider (like modprobe -r dax_hmem) yields: kobject: 'mapping0' (ffff93eb460e8800): kobject_release, parent 0000000000000000 (delayed 2000) [..] DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 23 PID: 282 at kernel/locking/lockdep.c:232 __lock_acquire+0x9fc/0x2260 [..] RIP: 0010:__lock_acquire+0x9fc/0x2260 [..] Call Trace: [..] lock_acquire+0xd4/0x2c0 ? ida_free+0x62/0x130 _raw_spin_lock_irqsave+0x47/0x70 ? ida_free+0x62/0x130 ida_free+0x62/0x130 dax_mapping_release+0x1f/0x30 device_release+0x36/0x90 kobject_delayed_cleanup+0x46/0x150 Due to attempting ida_free() on an ida object that has already been freed. Devices typically only hold a reference on their parent while registered. If a child needs a parent object to complete its release it needs to hold a reference that it drops from its release callback. Arrange for a dax_mapping to pin its parent dev_dax instance until dax_mapping_release(). Fixes: 0b07ce872a9e ("device-dax: introduce 'mapping' devices") Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577283412.1672036.16111545266174261446.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Dave Jiang Reviewed-by: Fan Ni Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma Signed-off-by: Sasha Levin --- drivers/dax/bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index a02777c93c07..48b7f0a64eb8 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -592,10 +592,12 @@ EXPORT_SYMBOL_GPL(alloc_dax_region); static void dax_mapping_release(struct device *dev) { struct dax_mapping *mapping = to_dax_mapping(dev); - struct dev_dax *dev_dax = to_dev_dax(dev->parent); + struct device *parent = dev->parent; + struct dev_dax *dev_dax = to_dev_dax(parent); ida_free(&dev_dax->ida, mapping->id); kfree(mapping); + put_device(parent); } static void unregister_dax_mapping(void *data) @@ -735,6 +737,7 @@ static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) dev = &mapping->dev; device_initialize(dev); dev->parent = &dev_dax->dev; + get_device(dev->parent); dev->type = &dax_mapping_type; dev_set_name(dev, "mapping%d", mapping->id); rc = device_add(dev); From 557e528255d5f5ea87b0db91969f317ebf4367f2 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 2 Jun 2023 23:14:05 -0700 Subject: [PATCH 209/509] dax: Introduce alloc_dev_dax_id() [ Upstream commit 70aab281e18c68a1284bc387de127c2fc0bed3f8 ] The reference counting of dax_region objects is needlessly complicated, has lead to confusion [1], and has hidden a bug [2]. Towards cleaning up that mess introduce alloc_dev_dax_id() to minimize the holding of a dax_region reference to only what dev_dax_release() needs, the dax_region->ida. Part of the reason for the mess was the design to dereference a dax_region in all cases in free_dev_dax_id() even if the id was statically assigned by the upper level dax_region driver. Remove the need to call "is_static(dax_region)" by tracking whether the id is dynamic directly in the dev_dax instance itself. With that flag the dax_region pinning and release per dev_dax instance can move to alloc_dev_dax_id() and free_dev_dax_id() respectively. A follow-on cleanup address the unnecessary references in the dax_region setup and drivers. Fixes: 0f3da14a4f05 ("device-dax: introduce 'seed' devices") Link: http://lore.kernel.org/r/20221203095858.612027-1-liuyongqiang13@huawei.com [1] Link: http://lore.kernel.org/r/3cf0890b-4eb0-e70e-cd9c-2ecc3d496263@hpe.com [2] Reported-by: Yongqiang Liu Reported-by: Paul Cassella Reported-by: Ira Weiny Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/168577284563.1672036.13493034988900989554.stgit@dwillia2-xfh.jf.intel.com Reviewed-by: Ira Weiny Signed-off-by: Vishal Verma Signed-off-by: Sasha Levin --- drivers/dax/bus.c | 56 ++++++++++++++++++++++++--------------- drivers/dax/dax-private.h | 4 ++- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 48b7f0a64eb8..0541b7e4d5c6 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -403,18 +403,34 @@ static void unregister_dev_dax(void *dev) put_device(dev); } +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) +{ + kref_put(&dax_region->kref, dax_region_free); +} +EXPORT_SYMBOL_GPL(dax_region_put); + /* a return value >= 0 indicates this invocation invalidated the id */ static int __free_dev_dax_id(struct dev_dax *dev_dax) { - struct dax_region *dax_region = dev_dax->region; struct device *dev = &dev_dax->dev; + struct dax_region *dax_region; int rc = dev_dax->id; device_lock_assert(dev); - if (is_static(dax_region) || dev_dax->id < 0) + if (!dev_dax->dyn_id || dev_dax->id < 0) return -1; + dax_region = dev_dax->region; ida_free(&dax_region->ida, dev_dax->id); + dax_region_put(dax_region); dev_dax->id = -1; return rc; } @@ -430,6 +446,20 @@ static int free_dev_dax_id(struct dev_dax *dev_dax) return rc; } +static int alloc_dev_dax_id(struct dev_dax *dev_dax) +{ + struct dax_region *dax_region = dev_dax->region; + int id; + + id = ida_alloc(&dax_region->ida, GFP_KERNEL); + if (id < 0) + return id; + kref_get(&dax_region->kref); + dev_dax->dyn_id = true; + dev_dax->id = id; + return id; +} + static ssize_t delete_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -517,20 +547,6 @@ static const struct attribute_group *dax_region_attribute_groups[] = { NULL, }; -static void dax_region_free(struct kref *kref) -{ - struct dax_region *dax_region; - - dax_region = container_of(kref, struct dax_region, kref); - kfree(dax_region); -} - -void dax_region_put(struct dax_region *dax_region) -{ - kref_put(&dax_region->kref, dax_region_free); -} -EXPORT_SYMBOL_GPL(dax_region_put); - static void dax_region_unregister(void *region) { struct dax_region *dax_region = region; @@ -1270,12 +1286,10 @@ static const struct attribute_group *dax_attribute_groups[] = { static void dev_dax_release(struct device *dev) { struct dev_dax *dev_dax = to_dev_dax(dev); - struct dax_region *dax_region = dev_dax->region; struct dax_device *dax_dev = dev_dax->dax_dev; put_dax(dax_dev); free_dev_dax_id(dev_dax); - dax_region_put(dax_region); kfree(dev_dax->pgmap); kfree(dev_dax); } @@ -1299,6 +1313,7 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) if (!dev_dax) return ERR_PTR(-ENOMEM); + dev_dax->region = dax_region; if (is_static(dax_region)) { if (dev_WARN_ONCE(parent, data->id < 0, "dynamic id specified to static region\n")) { @@ -1314,13 +1329,11 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) goto err_id; } - rc = ida_alloc(&dax_region->ida, GFP_KERNEL); + rc = alloc_dev_dax_id(dev_dax); if (rc < 0) goto err_id; - dev_dax->id = rc; } - dev_dax->region = dax_region; dev = &dev_dax->dev; device_initialize(dev); dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); @@ -1358,7 +1371,6 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) dev_dax->target_node = dax_region->target_node; dev_dax->align = dax_region->align; ida_init(&dev_dax->ida); - kref_get(&dax_region->kref); inode = dax_inode(dax_dev); dev->devt = inode->i_rdev; diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 1c974b7caae6..afcada6fd2ed 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -52,7 +52,8 @@ struct dax_mapping { * @region - parent region * @dax_dev - core dax functionality * @target_node: effective numa node if dev_dax memory range is onlined - * @id: ida allocated id + * @dyn_id: is this a dynamic or statically created instance + * @id: ida allocated id when the dax_region is not static * @ida: mapping id allocator * @dev - device core * @pgmap - pgmap for memmap setup / lifetime (driver owned) @@ -64,6 +65,7 @@ struct dev_dax { struct dax_device *dax_dev; unsigned int align; int target_node; + bool dyn_id; int id; struct ida ida; struct device dev; From cd5837564ff59b34fefe1d07af3442089bcfeb23 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Fri, 16 Jun 2023 09:58:13 +0100 Subject: [PATCH 210/509] hwrng: st - keep clock enabled while hwrng is registered [ Upstream commit 501e197a02d4aef157f53ba3a0b9049c3e52fedc ] The st-rng driver uses devres to register itself with the hwrng core, the driver will be unregistered from hwrng when its device goes out of scope. This happens after the driver's remove function is called. However, st-rng's clock is disabled in the remove function. There's a short timeframe where st-rng is still registered with the hwrng core although its clock is disabled. I suppose the clock must be active to access the hardware and serve requests from the hwrng core. Switch to devm_clk_get_enabled and let devres disable the clock and unregister the hwrng. This avoids the race condition. Fixes: 3e75241be808 ("hwrng: drivers - Use device-managed registration API") Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/char/hw_random/st-rng.c | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/char/hw_random/st-rng.c b/drivers/char/hw_random/st-rng.c index 15ba1e6fae4d..6e9dfac9fc9f 100644 --- a/drivers/char/hw_random/st-rng.c +++ b/drivers/char/hw_random/st-rng.c @@ -42,7 +42,6 @@ struct st_rng_data { void __iomem *base; - struct clk *clk; struct hwrng ops; }; @@ -85,26 +84,18 @@ static int st_rng_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); - clk = devm_clk_get(&pdev->dev, NULL); + clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(clk)) return PTR_ERR(clk); - ret = clk_prepare_enable(clk); - if (ret) - return ret; - ddata->ops.priv = (unsigned long)ddata; ddata->ops.read = st_rng_read; ddata->ops.name = pdev->name; ddata->base = base; - ddata->clk = clk; - - dev_set_drvdata(&pdev->dev, ddata); ret = devm_hwrng_register(&pdev->dev, &ddata->ops); if (ret) { dev_err(&pdev->dev, "Failed to register HW RNG\n"); - clk_disable_unprepare(clk); return ret; } @@ -113,15 +104,6 @@ static int st_rng_probe(struct platform_device *pdev) return 0; } -static int st_rng_remove(struct platform_device *pdev) -{ - struct st_rng_data *ddata = dev_get_drvdata(&pdev->dev); - - clk_disable_unprepare(ddata->clk); - - return 0; -} - static const struct of_device_id st_rng_match[] __maybe_unused = { { .compatible = "st,rng" }, {}, @@ -134,7 +116,6 @@ static struct platform_driver st_rng_driver = { .of_match_table = of_match_ptr(st_rng_match), }, .probe = st_rng_probe, - .remove = st_rng_remove }; module_platform_driver(st_rng_driver); From 810e401b34c4c4c244d8b93b9947ea5b3d4d49f8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Jul 2023 09:35:30 -0600 Subject: [PATCH 211/509] io_uring: ensure IOPOLL locks around deferred work No direct upstream commit exists for this issue. It was fixed in 5.18 as part of a larger rework of the completion side. io_commit_cqring() writes the CQ ring tail to make it visible, but it also kicks off any deferred work we have. A ring setup with IOPOLL does not need any locking around the CQ ring updates, as we're always under the ctx uring_lock. But if we have deferred work that needs processing, then io_queue_deferred() assumes that the completion_lock is held, as it is for !IOPOLL. Add a lockdep assertion to check and document this fact, and have io_iopoll_complete() check if we have deferred work and run that separately with the appropriate lock grabbed. Cc: stable@vger.kernel.org # 5.10, 5.15 Reported-by: dghost david Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index f169b4645769..bce80ed27c4d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1521,6 +1521,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) static void io_queue_deferred(struct io_ring_ctx *ctx) { + lockdep_assert_held(&ctx->completion_lock); + while (!list_empty(&ctx->defer_list)) { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); @@ -1572,14 +1574,24 @@ static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) io_queue_deferred(ctx); } -static inline void io_commit_cqring(struct io_ring_ctx *ctx) +static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx) +{ + return ctx->off_timeout_used || ctx->drain_active; +} + +static inline void __io_commit_cqring(struct io_ring_ctx *ctx) { - if (unlikely(ctx->off_timeout_used || ctx->drain_active)) - __io_commit_cqring_flush(ctx); /* order cqe stores with ring update */ smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); } +static inline void io_commit_cqring(struct io_ring_ctx *ctx) +{ + if (unlikely(io_commit_needs_flush(ctx))) + __io_commit_cqring_flush(ctx); + __io_commit_cqring(ctx); +} + static inline bool io_sqring_full(struct io_ring_ctx *ctx) { struct io_rings *r = ctx->rings; @@ -2518,7 +2530,12 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, io_req_free_batch(&rb, req, &ctx->submit_state); } - io_commit_cqring(ctx); + if (io_commit_needs_flush(ctx)) { + spin_lock(&ctx->completion_lock); + __io_commit_cqring_flush(ctx); + spin_unlock(&ctx->completion_lock); + } + __io_commit_cqring(ctx); io_cqring_ev_posted_iopoll(ctx); io_req_free_batch_finish(ctx, &rb); } From 8b0a55b59244163d70cf840e649e9fa4b3d98bb6 Mon Sep 17 00:00:00 2001 From: Davide Tronchin Date: Thu, 22 Jun 2023 11:29:21 +0200 Subject: [PATCH 212/509] USB: serial: option: add LARA-R6 01B PIDs commit ffa5f7a3bf28c1306eef85d4056539c2d4b8eb09 upstream. The new LARA-R6 product variant identified by the "01B" string can be configured (by AT interface) in three different USB modes: * Default mode (Vendor ID: 0x1546 Product ID: 0x1311) with 4 serial interfaces * RmNet mode (Vendor ID: 0x1546 Product ID: 0x1312) with 4 serial interfaces and 1 RmNet virtual network interface * CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1313) with 4 serial interface and 1 CDC-ECM virtual network interface The first 4 interfaces of all the 3 USB configurations (default, RmNet, CDC-ECM) are the same. In default mode LARA-R6 01B exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parser/alternative functions In RmNet mode LARA-R6 01B exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parser/alternative functions If 4: RMNET interface In CDC-ECM mode LARA-R6 01B exposes the following interfaces: If 0: Diagnostic If 1: AT parser If 2: AT parser If 3: AT parser/alternative functions If 4: CDC-ECM interface Signed-off-by: Davide Tronchin Link: https://lore.kernel.org/r/20230622092921.12651-1-davide.tronchin.94@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 243f97a30779..625d9dc776be 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1151,6 +1151,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x90fa), .driver_info = RSVD(3) }, /* u-blox products */ + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1311) }, /* u-blox LARA-R6 01B */ + { USB_DEVICE(UBLOX_VENDOR_ID, 0x1312), /* u-blox LARA-R6 01B (RMNET) */ + .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(UBLOX_VENDOR_ID, 0x1313, 0xff) }, /* u-blox LARA-R6 01B (ECM) */ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1341) }, /* u-blox LARA-L6 */ { USB_DEVICE(UBLOX_VENDOR_ID, 0x1342), /* u-blox LARA-L6 (RMNET) */ .driver_info = RSVD(4) }, From fac7be49f1e62ea04edb00e95a9c4e3102d08964 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Sun, 18 Jun 2023 17:39:49 +0530 Subject: [PATCH 213/509] usb: dwc3: gadget: Propagate core init errors to UDC during pullup commit c0aabed9cabe057309779a9e26fe86a113d24dad upstream. In scenarios where pullup relies on resume (get sync) to initialize the controller and set the run stop bit, then core_init is followed by gadget_resume which will eventually set run stop bit. But in cases where the core_init fails, the return value is not sent back to udc appropriately. So according to UDC the controller has started but in reality we never set the run stop bit. On systems like Android, there are uevents sent to HAL depending on whether the configfs_bind / configfs_disconnect were invoked. In the above mentioned scnenario, if the core init fails, the run stop won't be set and the cable plug-out won't result in generation of any disconnect event and userspace would never get any uevent regarding cable plug out and we never call pullup(0) again. Furthermore none of the next Plug-In/Plug-Out's would be known to configfs. Return back the appropriate result to UDC to let the userspace/ configfs know that the pullup failed so they can take appropriate action. Fixes: 77adb8bdf422 ("usb: dwc3: gadget: Allow runtime suspend if UDC unbinded") Cc: stable Signed-off-by: Krishna Kurapati Acked-by: Thinh Nguyen Message-ID: <20230618120949.14868-1-quic_kriskura@quicinc.com> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 164910237669..221738b64426 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2215,7 +2215,9 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) ret = pm_runtime_get_sync(dwc->dev); if (!ret || ret < 0) { pm_runtime_put(dwc->dev); - return 0; + if (ret < 0) + pm_runtime_set_suspended(dwc->dev); + return ret; } if (dwc->pullups_connected == is_on) { From b6a107c52073496d2e5d2837915f59fb3103832f Mon Sep 17 00:00:00 2001 From: EJ Hsu Date: Fri, 9 Jun 2023 14:29:32 +0800 Subject: [PATCH 214/509] phy: tegra: xusb: Clear the driver reference in usb-phy dev commit c0c2fcb1325d0d4f3b322b5ee49385f8eca2560d upstream. For the dual-role port, it will assign the phy dev to usb-phy dev and use the port dev driver as the dev driver of usb-phy. When we try to destroy the port dev, it will destroy its dev driver as well. But we did not remove the reference from usb-phy dev. This might cause the use-after-free issue in KASAN. Fixes: e8f7d2f409a1 ("phy: tegra: xusb: Add usb-phy support") Cc: stable@vger.kernel.org Signed-off-by: EJ Hsu Signed-off-by: Haotien Hsu Acked-by: Thierry Reding Acked-by: Jon Hunter Link: https://lore.kernel.org/r/20230609062932.3276509-1-haotienh@nvidia.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/phy/tegra/xusb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index d07f33ec7939..f93be3c4a4a6 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -556,6 +556,7 @@ static void tegra_xusb_port_unregister(struct tegra_xusb_port *port) usb_role_switch_unregister(port->usb_role_sw); cancel_work_sync(&port->usb_phy_work); usb_remove_phy(&port->usb_phy); + port->usb_phy.dev->driver = NULL; } if (port->ops->remove) From 54da6c4c143fedc9dec675503a806e65eb85b055 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Wed, 21 Jun 2023 08:17:23 +1200 Subject: [PATCH 215/509] block: fix signed int overflow in Amiga partition support commit fc3d092c6bb48d5865fec15ed5b333c12f36288c upstream. The Amiga partition parser module uses signed int for partition sector address and count, which will overflow for disks larger than 1 TB. Use sector_t as type for sector address and size to allow using disks up to 2 TB without LBD support, and disks larger than 2 TB with LBD. This bug was reported originally in 2012, and the fix was created by the RDB author, Joanne Dow . A patch had been discussed and reviewed on linux-m68k at that time but never officially submitted. This patch differs from Joanne's patch only in its use of sector_t instead of unsigned int. No checking for overflows is done (see patch 3 of this series for that). Reported-by: Martin Steigerwald Closes: https://bugzilla.kernel.org/show_bug.cgi?id=43511 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Message-ID: <201206192146.09327.Martin@lichtvoll.de> Cc: # 5.2 Signed-off-by: Michael Schmitz Tested-by: Martin Steigerwald Reviewed-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230620201725.7020-2-schmitzmic@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/partitions/amiga.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c index 9526491d9aed..64f0844468aa 100644 --- a/block/partitions/amiga.c +++ b/block/partitions/amiga.c @@ -31,7 +31,8 @@ int amiga_partition(struct parsed_partitions *state) unsigned char *data; struct RigidDiskBlock *rdb; struct PartitionBlock *pb; - int start_sect, nr_sects, blk, part, res = 0; + sector_t start_sect, nr_sects; + int blk, part, res = 0; int blksize = 1; /* Multiplier for disk block size */ int slot = 1; char b[BDEVNAME_SIZE]; @@ -97,14 +98,14 @@ int amiga_partition(struct parsed_partitions *state) /* Tell Kernel about it */ - nr_sects = (be32_to_cpu(pb->pb_Environment[10]) + 1 - - be32_to_cpu(pb->pb_Environment[9])) * + nr_sects = ((sector_t)be32_to_cpu(pb->pb_Environment[10]) + 1 - + be32_to_cpu(pb->pb_Environment[9])) * be32_to_cpu(pb->pb_Environment[3]) * be32_to_cpu(pb->pb_Environment[5]) * blksize; if (!nr_sects) continue; - start_sect = be32_to_cpu(pb->pb_Environment[9]) * + start_sect = (sector_t)be32_to_cpu(pb->pb_Environment[9]) * be32_to_cpu(pb->pb_Environment[3]) * be32_to_cpu(pb->pb_Environment[5]) * blksize; From e4a9b3333e67a65d8a70a4bb1c2a20f473ee5eb8 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Wed, 21 Jun 2023 08:17:24 +1200 Subject: [PATCH 216/509] block: change all __u32 annotations to __be32 in affs_hardblocks.h commit 95a55437dc49fb3342c82e61f5472a71c63d9ed0 upstream. The Amiga partition parser module uses signed int for partition sector address and count, which will overflow for disks larger than 1 TB. Use u64 as type for sector address and size to allow using disks up to 2 TB without LBD support, and disks larger than 2 TB with LBD. The RBD format allows to specify disk sizes up to 2^128 bytes (though native OS limitations reduce this somewhat, to max 2^68 bytes), so check for u64 overflow carefully to protect against overflowing sector_t. This bug was reported originally in 2012, and the fix was created by the RDB author, Joanne Dow . A patch had been discussed and reviewed on linux-m68k at that time but never officially submitted (now resubmitted as patch 1 of this series). Patch 3 (this series) adds additional error checking and warning messages. One of the error checks now makes use of the previously unused rdb_CylBlocks field, which causes a 'sparse' warning (cast to restricted __be32). Annotate all 32 bit fields in affs_hardblocks.h as __be32, as the on-disk format of RDB and partition blocks is always big endian. Reported-by: Martin Steigerwald Closes: https://bugzilla.kernel.org/show_bug.cgi?id=43511 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Message-ID: <201206192146.09327.Martin@lichtvoll.de> Cc: # 5.2 Signed-off-by: Michael Schmitz Reviewed-by: Christoph Hellwig Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230620201725.7020-3-schmitzmic@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/affs_hardblocks.h | 68 ++++++++++++++-------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/include/uapi/linux/affs_hardblocks.h b/include/uapi/linux/affs_hardblocks.h index 5e2fb8481252..a5aff2eb5f70 100644 --- a/include/uapi/linux/affs_hardblocks.h +++ b/include/uapi/linux/affs_hardblocks.h @@ -7,42 +7,42 @@ /* Just the needed definitions for the RDB of an Amiga HD. */ struct RigidDiskBlock { - __u32 rdb_ID; + __be32 rdb_ID; __be32 rdb_SummedLongs; - __s32 rdb_ChkSum; - __u32 rdb_HostID; + __be32 rdb_ChkSum; + __be32 rdb_HostID; __be32 rdb_BlockBytes; - __u32 rdb_Flags; - __u32 rdb_BadBlockList; + __be32 rdb_Flags; + __be32 rdb_BadBlockList; __be32 rdb_PartitionList; - __u32 rdb_FileSysHeaderList; - __u32 rdb_DriveInit; - __u32 rdb_Reserved1[6]; - __u32 rdb_Cylinders; - __u32 rdb_Sectors; - __u32 rdb_Heads; - __u32 rdb_Interleave; - __u32 rdb_Park; - __u32 rdb_Reserved2[3]; - __u32 rdb_WritePreComp; - __u32 rdb_ReducedWrite; - __u32 rdb_StepRate; - __u32 rdb_Reserved3[5]; - __u32 rdb_RDBBlocksLo; - __u32 rdb_RDBBlocksHi; - __u32 rdb_LoCylinder; - __u32 rdb_HiCylinder; - __u32 rdb_CylBlocks; - __u32 rdb_AutoParkSeconds; - __u32 rdb_HighRDSKBlock; - __u32 rdb_Reserved4; + __be32 rdb_FileSysHeaderList; + __be32 rdb_DriveInit; + __be32 rdb_Reserved1[6]; + __be32 rdb_Cylinders; + __be32 rdb_Sectors; + __be32 rdb_Heads; + __be32 rdb_Interleave; + __be32 rdb_Park; + __be32 rdb_Reserved2[3]; + __be32 rdb_WritePreComp; + __be32 rdb_ReducedWrite; + __be32 rdb_StepRate; + __be32 rdb_Reserved3[5]; + __be32 rdb_RDBBlocksLo; + __be32 rdb_RDBBlocksHi; + __be32 rdb_LoCylinder; + __be32 rdb_HiCylinder; + __be32 rdb_CylBlocks; + __be32 rdb_AutoParkSeconds; + __be32 rdb_HighRDSKBlock; + __be32 rdb_Reserved4; char rdb_DiskVendor[8]; char rdb_DiskProduct[16]; char rdb_DiskRevision[4]; char rdb_ControllerVendor[8]; char rdb_ControllerProduct[16]; char rdb_ControllerRevision[4]; - __u32 rdb_Reserved5[10]; + __be32 rdb_Reserved5[10]; }; #define IDNAME_RIGIDDISK 0x5244534B /* "RDSK" */ @@ -50,16 +50,16 @@ struct RigidDiskBlock { struct PartitionBlock { __be32 pb_ID; __be32 pb_SummedLongs; - __s32 pb_ChkSum; - __u32 pb_HostID; + __be32 pb_ChkSum; + __be32 pb_HostID; __be32 pb_Next; - __u32 pb_Flags; - __u32 pb_Reserved1[2]; - __u32 pb_DevFlags; + __be32 pb_Flags; + __be32 pb_Reserved1[2]; + __be32 pb_DevFlags; __u8 pb_DriveName[32]; - __u32 pb_Reserved2[15]; + __be32 pb_Reserved2[15]; __be32 pb_Environment[17]; - __u32 pb_EReserved[15]; + __be32 pb_EReserved[15]; }; #define IDNAME_PARTITION 0x50415254 /* "PART" */ From cd5ec3ee52ce4b7e283cc11facfa420c297c8065 Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Mon, 15 May 2023 10:13:07 +0800 Subject: [PATCH 217/509] SUNRPC: Fix UAF in svc_tcp_listen_data_ready() commit fc80fc2d4e39137869da3150ee169b40bf879287 upstream. After the listener svc_sock is freed, and before invoking svc_tcp_accept() for the established child sock, there is a window that the newsock retaining a freed listener svc_sock in sk_user_data which cloning from parent. In the race window, if data is received on the newsock, we will observe use-after-free report in svc_tcp_listen_data_ready(). Reproduce by two tasks: 1. while :; do rpc.nfsd 0 ; rpc.nfsd; done 2. while :; do echo "" | ncat -4 127.0.0.1 2049 ; done KASAN report: ================================================================== BUG: KASAN: slab-use-after-free in svc_tcp_listen_data_ready+0x1cf/0x1f0 [sunrpc] Read of size 8 at addr ffff888139d96228 by task nc/102553 CPU: 7 PID: 102553 Comm: nc Not tainted 6.3.0+ #18 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 Call Trace: dump_stack_lvl+0x33/0x50 print_address_description.constprop.0+0x27/0x310 print_report+0x3e/0x70 kasan_report+0xae/0xe0 svc_tcp_listen_data_ready+0x1cf/0x1f0 [sunrpc] tcp_data_queue+0x9f4/0x20e0 tcp_rcv_established+0x666/0x1f60 tcp_v4_do_rcv+0x51c/0x850 tcp_v4_rcv+0x23fc/0x2e80 ip_protocol_deliver_rcu+0x62/0x300 ip_local_deliver_finish+0x267/0x350 ip_local_deliver+0x18b/0x2d0 ip_rcv+0x2fb/0x370 __netif_receive_skb_one_core+0x166/0x1b0 process_backlog+0x24c/0x5e0 __napi_poll+0xa2/0x500 net_rx_action+0x854/0xc90 __do_softirq+0x1bb/0x5de do_softirq+0xcb/0x100 ... Allocated by task 102371: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_kmalloc+0x7b/0x90 svc_setup_socket+0x52/0x4f0 [sunrpc] svc_addsock+0x20d/0x400 [sunrpc] __write_ports_addfd+0x209/0x390 [nfsd] write_ports+0x239/0x2c0 [nfsd] nfsctl_transaction_write+0xac/0x110 [nfsd] vfs_write+0x1c3/0xae0 ksys_write+0xed/0x1c0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Freed by task 102551: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x50 __kasan_slab_free+0x106/0x190 __kmem_cache_free+0x133/0x270 svc_xprt_free+0x1e2/0x350 [sunrpc] svc_xprt_destroy_all+0x25a/0x440 [sunrpc] nfsd_put+0x125/0x240 [nfsd] nfsd_svc+0x2cb/0x3c0 [nfsd] write_threads+0x1ac/0x2a0 [nfsd] nfsctl_transaction_write+0xac/0x110 [nfsd] vfs_write+0x1c3/0xae0 ksys_write+0xed/0x1c0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc Fix the UAF by simply doing nothing in svc_tcp_listen_data_ready() if state != TCP_LISTEN, that will avoid dereferencing svsk for all child socket. Link: https://lore.kernel.org/lkml/20230507091131.23540-1-dinghui@sangfor.com.cn/ Fixes: fa9251afc33c ("SUNRPC: Call the default socket callbacks instead of open coding") Signed-off-by: Ding Hui Cc: Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/svcsock.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 6d5bb8bfed38..3d5ee042c501 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -692,12 +692,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - if (svsk) { - /* Refer to svc_setup_socket() for details. */ - rmb(); - svsk->sk_odata(sk); - } - /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -706,13 +700,18 @@ static void svc_tcp_listen_data_ready(struct sock *sk) * when one of child sockets become ESTABLISHED. * 2) data_ready method of the child socket may be called * when it receives data before the socket is accepted. - * In case of 2, we should ignore it silently. + * In case of 2, we should ignore it silently and DO NOT + * dereference svsk. */ - if (sk->sk_state == TCP_LISTEN) { - if (svsk) { - set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); - svc_xprt_enqueue(&svsk->sk_xprt); - } + if (sk->sk_state != TCP_LISTEN) + return; + + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); + svsk->sk_odata(sk); + set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); + svc_xprt_enqueue(&svsk->sk_xprt); } } From a27aeae714cdf7249914a57e14627581cefc1336 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 27 Apr 2023 13:21:52 +0200 Subject: [PATCH 218/509] w1: w1_therm: fix locking behavior in convert_t [ Upstream commit dca5480ab7b77a889088ab7cac81934604510ac7 ] The commit 67b392f7b8ed ("w1_therm: optimizing temperature read timings") accidentially inverted the logic for lock handling of the bus mutex. Before: pullup -> release lock before sleep no pullup -> release lock after sleep After: pullup -> release lock after sleep no pullup -> release lock before sleep This cause spurious measurements of 85 degree (powerup value) on the Tarragon board with connected 1-w temperature sensor (w1_therm.w1_strong_pull=0). In the meantime a new feature for polling the conversion completion has been integrated in these branches with commit 021da53e65fd ("w1: w1_therm: Add sysfs entries to control conversion time and driver features"). But this feature isn't available for parasite power mode, so handle this separately. Link: https://lore.kernel.org/regressions/2023042645-attentive-amends-7b0b@gregkh/T/ Fixes: 67b392f7b8ed ("w1_therm: optimizing temperature read timings") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20230427112152.12313-1-stefan.wahren@i2se.com Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/w1/slaves/w1_therm.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 6546d029c7fd..3888643a22f6 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -1094,29 +1094,26 @@ static int convert_t(struct w1_slave *sl, struct therm_info *info) w1_write_8(dev_master, W1_CONVERT_TEMP); - if (strong_pullup) { /*some device need pullup */ + if (SLAVE_FEATURES(sl) & W1_THERM_POLL_COMPLETION) { + ret = w1_poll_completion(dev_master, W1_POLL_CONVERT_TEMP); + if (ret) { + dev_dbg(&sl->dev, "%s: Timeout\n", __func__); + goto mt_unlock; + } + mutex_unlock(&dev_master->bus_mutex); + } else if (!strong_pullup) { /*no device need pullup */ sleep_rem = msleep_interruptible(t_conv); if (sleep_rem != 0) { ret = -EINTR; goto mt_unlock; } mutex_unlock(&dev_master->bus_mutex); - } else { /*no device need pullup */ - if (SLAVE_FEATURES(sl) & W1_THERM_POLL_COMPLETION) { - ret = w1_poll_completion(dev_master, W1_POLL_CONVERT_TEMP); - if (ret) { - dev_dbg(&sl->dev, "%s: Timeout\n", __func__); - goto mt_unlock; - } - mutex_unlock(&dev_master->bus_mutex); - } else { - /* Fixed delay */ - mutex_unlock(&dev_master->bus_mutex); - sleep_rem = msleep_interruptible(t_conv); - if (sleep_rem != 0) { - ret = -EINTR; - goto dec_refcnt; - } + } else { /*some device need pullup */ + mutex_unlock(&dev_master->bus_mutex); + sleep_rem = msleep_interruptible(t_conv); + if (sleep_rem != 0) { + ret = -EINTR; + goto dec_refcnt; } } ret = read_scratchpad(sl, info); From a7890637b3b9b485ffd6a8bfc4276fab03b8e9f5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 May 2021 17:17:45 +0300 Subject: [PATCH 219/509] w1: fix loop in w1_fini() [ Upstream commit 83f3fcf96fcc7e5405b37d9424c7ef26bfa203f8 ] The __w1_remove_master_device() function calls: list_del(&dev->w1_master_entry); So presumably this can cause an endless loop. Fixes: 7785925dd8e0 ("[PATCH] w1: cleanups.") Signed-off-by: Dan Carpenter Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- drivers/w1/w1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 15842377c8d2..1c1a9438f4b6 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -1228,10 +1228,10 @@ static int __init w1_init(void) static void __exit w1_fini(void) { - struct w1_master *dev; + struct w1_master *dev, *n; /* Set netlink removal messages and some cleanup */ - list_for_each_entry(dev, &w1_masters, w1_master_entry) + list_for_each_entry_safe(dev, n, &w1_masters, w1_master_entry) __w1_remove_master_device(dev); w1_fini_netlink(); From ed68e8e22ee1110934efbe2908640baa3056dbcb Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Wed, 3 May 2023 14:57:41 +0200 Subject: [PATCH 220/509] sh: j2: Use ioremap() to translate device tree address into kernel memory [ Upstream commit bc9d1f0cecd2407cfb2364a7d4be2f52d1d46a9d ] Addresses the following warning when building j2_defconfig: arch/sh/kernel/cpu/sh2/probe.c: In function 'scan_cache': arch/sh/kernel/cpu/sh2/probe.c:24:16: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 24 | j2_ccr_base = (u32 __iomem *)of_flat_dt_translate_address(node); | Fixes: 5a846abad07f ("sh: add support for J-Core J2 processor") Reviewed-by: Geert Uytterhoeven Tested-by: Rob Landley Signed-off-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20230503125746.331835-1-glaubitz@physik.fu-berlin.de Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Sasha Levin --- arch/sh/kernel/cpu/sh2/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c index d342ea08843f..70a07f4f2142 100644 --- a/arch/sh/kernel/cpu/sh2/probe.c +++ b/arch/sh/kernel/cpu/sh2/probe.c @@ -21,7 +21,7 @@ static int __init scan_cache(unsigned long node, const char *uname, if (!of_flat_dt_is_compatible(node, "jcore,cache")) return 0; - j2_ccr_base = (u32 __iomem *)of_flat_dt_translate_address(node); + j2_ccr_base = ioremap(of_flat_dt_translate_address(node), 4); return 1; } From bb81ca33ace3b4089c1c4fe07b0c39a453cdcad7 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 8 May 2023 11:20:11 +0300 Subject: [PATCH 221/509] serial: 8250: omap: Fix freeing of resources on failed register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b9ab22c2bc8652324a803b3e2be69838920b4025 ] If serial8250_register_8250_port() fails, the SoC can hang as the deferred PMQoS work will still run as is not flushed and removed. Fixes: 61929cf0169d ("tty: serial: Add 8250-core based omap driver") Signed-off-by: Tony Lindgren Reviewed-by: Ilpo JƤrvinen Link: https://lore.kernel.org/r/20230508082014.23083-2-tony@atomide.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 483fff3a95c9..6b255e1633fd 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1469,7 +1469,9 @@ static int omap8250_probe(struct platform_device *pdev) err: pm_runtime_dont_use_autosuspend(&pdev->dev); pm_runtime_put_sync(&pdev->dev); + flush_work(&priv->qos_work); pm_runtime_disable(&pdev->dev); + cpu_latency_qos_remove_request(&priv->pm_qos_request); return ret; } From 32809afb6063197c45e522b62cf7dced2e3c20ed Mon Sep 17 00:00:00 2001 From: Mantas Pucka Date: Tue, 25 Apr 2023 12:11:49 +0300 Subject: [PATCH 222/509] clk: qcom: gcc-ipq6018: Use floor ops for sdcc clocks [ Upstream commit 56e5ae0116aef87273cf1812d608645b076e4f02 ] SDCC clocks must be rounded down to avoid overclocking the controller. Fixes: d9db07f088af ("clk: qcom: Add ipq6018 Global Clock Controller support") Signed-off-by: Mantas Pucka Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/1682413909-24927-1-git-send-email-mantas@8devices.com Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-ipq6018.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/gcc-ipq6018.c b/drivers/clk/qcom/gcc-ipq6018.c index 3f9c2f61a5d9..5c5d1b04ea7a 100644 --- a/drivers/clk/qcom/gcc-ipq6018.c +++ b/drivers/clk/qcom/gcc-ipq6018.c @@ -1654,7 +1654,7 @@ static struct clk_rcg2 sdcc1_apps_clk_src = { .name = "sdcc1_apps_clk_src", .parent_data = gcc_xo_gpll0_gpll2_gpll0_out_main_div2, .num_parents = 4, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; From 5f3f4aa673a03cd04030f2bec2c825e476082583 Mon Sep 17 00:00:00 2001 From: Daniil Dulov Date: Tue, 14 Mar 2023 10:04:49 -0700 Subject: [PATCH 223/509] media: usb: Check az6007_read() return value [ Upstream commit fdaca63186f59fc664b346c45b76576624b48e57 ] If az6007_read() returns error, there is no sence to continue. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 3af2f4f15a61 ("[media] az6007: Change the az6007 read/write routine parameter") Signed-off-by: Daniil Dulov Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/dvb-usb-v2/az6007.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb-v2/az6007.c b/drivers/media/usb/dvb-usb-v2/az6007.c index 62ee09f28a0b..7524c90f5da6 100644 --- a/drivers/media/usb/dvb-usb-v2/az6007.c +++ b/drivers/media/usb/dvb-usb-v2/az6007.c @@ -202,7 +202,8 @@ static int az6007_rc_query(struct dvb_usb_device *d) unsigned code; enum rc_proto proto; - az6007_read(d, AZ6007_READ_IR, 0, 0, st->data, 10); + if (az6007_read(d, AZ6007_READ_IR, 0, 0, st->data, 10) < 0) + return -EIO; if (st->data[1] == 0x44) return 0; From 300388887cbba2bb92e2d81dc20448cd67e06391 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 18 May 2023 15:36:49 +0200 Subject: [PATCH 224/509] media: videodev2.h: Fix struct v4l2_input tuner index comment [ Upstream commit 26ae58f65e64fa7ba61d64bae752e59e08380c6a ] VIDIOC_ENUMINPUT documentation describes the tuner field of struct v4l2_input as index: Documentation/userspace-api/media/v4l/vidioc-enuminput.rst " * - __u32 - ``tuner`` - Capture devices can have zero or more tuners (RF demodulators). When the ``type`` is set to ``V4L2_INPUT_TYPE_TUNER`` this is an RF connector and this field identifies the tuner. It corresponds to struct :c:type:`v4l2_tuner` field ``index``. For details on tuners see :ref:`tuner`. " Drivers I could find also use the 'tuner' field as an index, e.g.: drivers/media/pci/bt8xx/bttv-driver.c bttv_enum_input() drivers/media/usb/go7007/go7007-v4l2.c vidioc_enum_input() However, the UAPI comment claims this field is 'enum v4l2_tuner_type': include/uapi/linux/videodev2.h This field being 'enum v4l2_tuner_type' is unlikely as it seems to be never used that way in drivers, and documentation confirms it. It seem this comment got in accidentally in the commit which this patch fixes. Fix the UAPI comment to stop confusion. This was pointed out by Dmitry while reviewing VIDIOC_ENUMINPUT support for strace. Fixes: 6016af82eafc ("[media] v4l2: use __u32 rather than enums in ioctl() structs") Signed-off-by: Marek Vasut Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- include/uapi/linux/videodev2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index b28817c59fdf..55b8c4b82479 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -1644,7 +1644,7 @@ struct v4l2_input { __u8 name[32]; /* Label */ __u32 type; /* Type of input */ __u32 audioset; /* Associated audios (bitfield) */ - __u32 tuner; /* enum v4l2_tuner_type */ + __u32 tuner; /* Tuner index */ v4l2_std_id std; __u32 status; __u32 capabilities; From d87ef4e857b790f1616809eccda6b4d0c9c3da11 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 23 May 2023 07:59:32 +0800 Subject: [PATCH 225/509] media: usb: siano: Fix warning due to null work_func_t function pointer [ Upstream commit 6f489a966fbeb0da63d45c2c66a8957eab604bf6 ] The previous commit ebad8e731c1c ("media: usb: siano: Fix use after free bugs caused by do_submit_urb") adds cancel_work_sync() in smsusb_stop_streaming(). But smsusb_stop_streaming() may be called, even if the work_struct surb->wq has not been initialized. As a result, the warning will occur. One of the processes that could lead to warning is shown below: smsusb_probe() smsusb_init_device() if (!dev->in_ep || !dev->out_ep || align < 0) { smsusb_term_device(intf); smsusb_stop_streaming() cancel_work_sync(&dev->surbs[i].wq); __cancel_work_timer() __flush_work() if (WARN_ON(!work->func)) // work->func is null The log reported by syzbot is shown below: WARNING: CPU: 0 PID: 897 at kernel/workqueue.c:3066 __flush_work+0x798/0xa80 kernel/workqueue.c:3063 Modules linked in: CPU: 0 PID: 897 Comm: kworker/0:2 Not tainted 6.2.0-rc1-syzkaller #0 RIP: 0010:__flush_work+0x798/0xa80 kernel/workqueue.c:3066 ... RSP: 0018:ffffc9000464ebf8 EFLAGS: 00010246 RAX: 1ffff11002dbb420 RBX: 0000000000000021 RCX: 1ffffffff204fa4e RDX: dffffc0000000000 RSI: 0000000000000001 RDI: ffff888016dda0e8 RBP: ffffc9000464ed98 R08: 0000000000000001 R09: ffffffff90253b2f R10: 0000000000000001 R11: 0000000000000000 R12: ffff888016dda0e8 R13: ffff888016dda0e8 R14: ffff888016dda100 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd4331efe8 CR3: 000000000b48e000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __cancel_work_timer+0x315/0x460 kernel/workqueue.c:3160 smsusb_stop_streaming drivers/media/usb/siano/smsusb.c:182 [inline] smsusb_term_device+0xda/0x2d0 drivers/media/usb/siano/smsusb.c:344 smsusb_init_device+0x400/0x9ce drivers/media/usb/siano/smsusb.c:419 smsusb_probe+0xbbd/0xc55 drivers/media/usb/siano/smsusb.c:567 ... This patch adds check before cancel_work_sync(). If surb->wq has not been initialized, the cancel_work_sync() will not be executed. Reported-by: syzbot+27b0b464864741b18b99@syzkaller.appspotmail.com Fixes: ebad8e731c1c ("media: usb: siano: Fix use after free bugs caused by do_submit_urb") Signed-off-by: Duoming Zhou Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/usb/siano/smsusb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/siano/smsusb.c b/drivers/media/usb/siano/smsusb.c index 1babfe6e2c36..5c223b5498b4 100644 --- a/drivers/media/usb/siano/smsusb.c +++ b/drivers/media/usb/siano/smsusb.c @@ -179,7 +179,8 @@ static void smsusb_stop_streaming(struct smsusb_device_t *dev) for (i = 0; i < MAX_URBS; i++) { usb_kill_urb(&dev->surbs[i].urb); - cancel_work_sync(&dev->surbs[i].wq); + if (dev->surbs[i].wq.func) + cancel_work_sync(&dev->surbs[i].wq); if (dev->surbs[i].cb) { smscore_putbuffer(dev->coredev, dev->surbs[i].cb); From 35fd1a213fa4435f8941f27782b5315a93fb915b Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Jul 2022 15:41:29 +0200 Subject: [PATCH 226/509] clk: qcom: reset: Allow specifying custom reset delay [ Upstream commit 2cb8a39b6781ea23accd1fa93b3ad000d0948aec ] The amount of time required between asserting and deasserting the reset signal can vary depending on the involved hardware component. Sometimes 1 us might not be enough and a larger delay is necessary to conform to the specifications. Usually this is worked around in the consuming drivers, by replacing reset_control_reset() with a sequence of reset_control_assert(), waiting for a custom delay, followed by reset_control_deassert(). However, in some cases the driver making use of the reset is generic and can be used with different reset controllers. In this case the reset time requirement is better handled directly by the reset controller driver. Make this possible by adding an "udelay" field to the qcom_reset_map that allows setting a different reset delay (in microseconds). Signed-off-by: Stephan Gerhold Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220706134132.3623415-4-stephan.gerhold@kernkonzept.com Stable-dep-of: 349b5bed539b ("clk: qcom: ipq6018: fix networking resets") Signed-off-by: Sasha Levin --- drivers/clk/qcom/reset.c | 4 +++- drivers/clk/qcom/reset.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c index 819d194be8f7..2a16adb572d2 100644 --- a/drivers/clk/qcom/reset.c +++ b/drivers/clk/qcom/reset.c @@ -13,8 +13,10 @@ static int qcom_reset(struct reset_controller_dev *rcdev, unsigned long id) { + struct qcom_reset_controller *rst = to_qcom_reset_controller(rcdev); + rcdev->ops->assert(rcdev, id); - udelay(1); + udelay(rst->reset_map[id].udelay ?: 1); /* use 1 us as default */ rcdev->ops->deassert(rcdev, id); return 0; } diff --git a/drivers/clk/qcom/reset.h b/drivers/clk/qcom/reset.h index 2a08b5e282c7..b8c113582072 100644 --- a/drivers/clk/qcom/reset.h +++ b/drivers/clk/qcom/reset.h @@ -11,6 +11,7 @@ struct qcom_reset_map { unsigned int reg; u8 bit; + u8 udelay; }; struct regmap; From ee3f494cfc3e6b6102c5ae7fc84b600bc6c008fd Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Mon, 7 Nov 2022 14:28:59 +0100 Subject: [PATCH 227/509] clk: qcom: reset: support resetting multiple bits [ Upstream commit 4a5210893625f89723ea210d7c630b730abb37ad ] This patch adds the support for giving the complete bitmask in reset structure and reset operation will use this bitmask for all reset operations. Currently, reset structure only takes a single bit for each reset and then calculates the bitmask by using the BIT() macro. However, this is not sufficient anymore for newer SoC-s like IPQ8074, IPQ6018 and more, since their networking resets require multiple bits to be asserted in order to properly reset the HW block completely. So, in order to allow asserting multiple bits add "bitmask" field to qcom_reset_map, and then use that bitmask value if its populated in the driver, if its not populated, then we just default to existing behaviour and calculate the bitmask on the fly. Signed-off-by: Robert Marko Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221107132901.489240-1-robimarko@gmail.com Stable-dep-of: 349b5bed539b ("clk: qcom: ipq6018: fix networking resets") Signed-off-by: Sasha Levin --- drivers/clk/qcom/reset.c | 4 ++-- drivers/clk/qcom/reset.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/reset.c b/drivers/clk/qcom/reset.c index 2a16adb572d2..0e914ec7aeae 100644 --- a/drivers/clk/qcom/reset.c +++ b/drivers/clk/qcom/reset.c @@ -30,7 +30,7 @@ qcom_reset_assert(struct reset_controller_dev *rcdev, unsigned long id) rst = to_qcom_reset_controller(rcdev); map = &rst->reset_map[id]; - mask = BIT(map->bit); + mask = map->bitmask ? map->bitmask : BIT(map->bit); return regmap_update_bits(rst->regmap, map->reg, mask, mask); } @@ -44,7 +44,7 @@ qcom_reset_deassert(struct reset_controller_dev *rcdev, unsigned long id) rst = to_qcom_reset_controller(rcdev); map = &rst->reset_map[id]; - mask = BIT(map->bit); + mask = map->bitmask ? map->bitmask : BIT(map->bit); return regmap_update_bits(rst->regmap, map->reg, mask, 0); } diff --git a/drivers/clk/qcom/reset.h b/drivers/clk/qcom/reset.h index b8c113582072..9a47c838d9b1 100644 --- a/drivers/clk/qcom/reset.h +++ b/drivers/clk/qcom/reset.h @@ -12,6 +12,7 @@ struct qcom_reset_map { unsigned int reg; u8 bit; u8 udelay; + u32 bitmask; }; struct regmap; From bdce16c1e650106bead98171adac36cfc7f339d8 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Fri, 26 May 2023 21:08:55 +0200 Subject: [PATCH 228/509] clk: qcom: ipq6018: fix networking resets [ Upstream commit 349b5bed539b491b7894a5186a895751fd8ba6c7 ] Networking resets in IPQ6018 all use bitmask as they require multiple bits to be set and cleared instead of a single bit. So, current networking resets have the same register and bit 0 set which is clearly incorrect. Fixes: d9db07f088af ("clk: qcom: Add ipq6018 Global Clock Controller support") Signed-off-by: Robert Marko Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230526190855.2941291-2-robimarko@gmail.com Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-ipq6018.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/clk/qcom/gcc-ipq6018.c b/drivers/clk/qcom/gcc-ipq6018.c index 5c5d1b04ea7a..cde62a11f573 100644 --- a/drivers/clk/qcom/gcc-ipq6018.c +++ b/drivers/clk/qcom/gcc-ipq6018.c @@ -4517,24 +4517,24 @@ static const struct qcom_reset_map gcc_ipq6018_resets[] = { [GCC_PCIE0_AHB_ARES] = { 0x75040, 5 }, [GCC_PCIE0_AXI_MASTER_STICKY_ARES] = { 0x75040, 6 }, [GCC_PCIE0_AXI_SLAVE_STICKY_ARES] = { 0x75040, 7 }, - [GCC_PPE_FULL_RESET] = { 0x68014, 0 }, - [GCC_UNIPHY0_SOFT_RESET] = { 0x56004, 0 }, + [GCC_PPE_FULL_RESET] = { .reg = 0x68014, .bitmask = 0xf0000 }, + [GCC_UNIPHY0_SOFT_RESET] = { .reg = 0x56004, .bitmask = 0x3ff2 }, [GCC_UNIPHY0_XPCS_RESET] = { 0x56004, 2 }, - [GCC_UNIPHY1_SOFT_RESET] = { 0x56104, 0 }, + [GCC_UNIPHY1_SOFT_RESET] = { .reg = 0x56104, .bitmask = 0x32 }, [GCC_UNIPHY1_XPCS_RESET] = { 0x56104, 2 }, - [GCC_EDMA_HW_RESET] = { 0x68014, 0 }, - [GCC_NSSPORT1_RESET] = { 0x68014, 0 }, - [GCC_NSSPORT2_RESET] = { 0x68014, 0 }, - [GCC_NSSPORT3_RESET] = { 0x68014, 0 }, - [GCC_NSSPORT4_RESET] = { 0x68014, 0 }, - [GCC_NSSPORT5_RESET] = { 0x68014, 0 }, - [GCC_UNIPHY0_PORT1_ARES] = { 0x56004, 0 }, - [GCC_UNIPHY0_PORT2_ARES] = { 0x56004, 0 }, - [GCC_UNIPHY0_PORT3_ARES] = { 0x56004, 0 }, - [GCC_UNIPHY0_PORT4_ARES] = { 0x56004, 0 }, - [GCC_UNIPHY0_PORT5_ARES] = { 0x56004, 0 }, - [GCC_UNIPHY0_PORT_4_5_RESET] = { 0x56004, 0 }, - [GCC_UNIPHY0_PORT_4_RESET] = { 0x56004, 0 }, + [GCC_EDMA_HW_RESET] = { .reg = 0x68014, .bitmask = 0x300000 }, + [GCC_NSSPORT1_RESET] = { .reg = 0x68014, .bitmask = 0x1000003 }, + [GCC_NSSPORT2_RESET] = { .reg = 0x68014, .bitmask = 0x200000c }, + [GCC_NSSPORT3_RESET] = { .reg = 0x68014, .bitmask = 0x4000030 }, + [GCC_NSSPORT4_RESET] = { .reg = 0x68014, .bitmask = 0x8000300 }, + [GCC_NSSPORT5_RESET] = { .reg = 0x68014, .bitmask = 0x10000c00 }, + [GCC_UNIPHY0_PORT1_ARES] = { .reg = 0x56004, .bitmask = 0x30 }, + [GCC_UNIPHY0_PORT2_ARES] = { .reg = 0x56004, .bitmask = 0xc0 }, + [GCC_UNIPHY0_PORT3_ARES] = { .reg = 0x56004, .bitmask = 0x300 }, + [GCC_UNIPHY0_PORT4_ARES] = { .reg = 0x56004, .bitmask = 0xc00 }, + [GCC_UNIPHY0_PORT5_ARES] = { .reg = 0x56004, .bitmask = 0x3000 }, + [GCC_UNIPHY0_PORT_4_5_RESET] = { .reg = 0x56004, .bitmask = 0x3c02 }, + [GCC_UNIPHY0_PORT_4_RESET] = { .reg = 0x56004, .bitmask = 0xc02 }, [GCC_LPASS_BCR] = {0x1F000, 0}, [GCC_UBI32_TBU_BCR] = {0x65000, 0}, [GCC_LPASS_TBU_BCR] = {0x6C000, 0}, From 74f8606ddfa450d2255b4e61472a7632def1e8c4 Mon Sep 17 00:00:00 2001 From: Vladislav Efanov Date: Wed, 17 May 2023 20:25:18 +0300 Subject: [PATCH 229/509] usb: dwc3: qcom: Fix potential memory leak [ Upstream commit 097fb3ee710d4de83b8d4f5589e8ee13e0f0541e ] Function dwc3_qcom_probe() allocates memory for resource structure which is pointed by parent_res pointer. This memory is not freed. This leads to memory leak. Use stack memory to prevent memory leak. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 2bc02355f8ba ("usb: dwc3: qcom: Add support for booting with ACPI") Signed-off-by: Vladislav Efanov Acked-by: Shawn Guo Link: https://lore.kernel.org/r/20230517172518.442591-1-VEfanov@ispras.ru Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-qcom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index dac13fe97811..5c66efb05d7f 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -722,6 +722,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct dwc3_qcom *qcom; struct resource *res, *parent_res = NULL; + struct resource local_res; int ret, i; bool ignore_pipe_clk; @@ -772,9 +773,8 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (np) { parent_res = res; } else { - parent_res = kmemdup(res, sizeof(struct resource), GFP_KERNEL); - if (!parent_res) - return -ENOMEM; + memcpy(&local_res, res, sizeof(struct resource)); + parent_res = &local_res; parent_res->start = res->start + qcom->acpi_pdata->qscratch_base_offset; From 2788a3553f7497075653210b42e2aeb6ba95e28e Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Fri, 5 May 2023 14:48:37 +0530 Subject: [PATCH 230/509] usb: gadget: u_serial: Add null pointer check in gserial_suspend [ Upstream commit 2f6ecb89fe8feb2b60a53325b0eeb9866d88909a ] Consider a case where gserial_disconnect has already cleared gser->ioport. And if gserial_suspend gets called afterwards, it will lead to accessing of gser->ioport and thus causing null pointer dereference. Avoid this by adding a null pointer check. Added a static spinlock to prevent gser->ioport from becoming null after the newly added null pointer check. Fixes: aba3a8d01d62 ("usb: gadget: u_serial: add suspend resume callbacks") Signed-off-by: Prashanth K Link: https://lore.kernel.org/r/1683278317-11774-1-git-send-email-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/u_serial.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index 7b54e814aefb..3b5a6430e241 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1421,10 +1421,19 @@ EXPORT_SYMBOL_GPL(gserial_disconnect); void gserial_suspend(struct gserial *gser) { - struct gs_port *port = gser->ioport; + struct gs_port *port; unsigned long flags; - spin_lock_irqsave(&port->port_lock, flags); + spin_lock_irqsave(&serial_port_lock, flags); + port = gser->ioport; + + if (!port) { + spin_unlock_irqrestore(&serial_port_lock, flags); + return; + } + + spin_lock(&port->port_lock); + spin_unlock(&serial_port_lock); port->suspended = true; spin_unlock_irqrestore(&port->port_lock, flags); } From dac7d7efcb545692a110df611c9c195737967451 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Mar 2023 16:39:52 +0200 Subject: [PATCH 231/509] extcon: Fix kernel doc of property fields to avoid warnings [ Upstream commit 7e77e0b7a9f4cdf91cb0950749b40c840ea63efc ] Kernel documentation has to be synchronized with a code, otherwise the validator is not happy: Function parameter or member 'usb_propval' not described in 'extcon_cable' Function parameter or member 'chg_propval' not described in 'extcon_cable' Function parameter or member 'jack_propval' not described in 'extcon_cable' Function parameter or member 'disp_propval' not described in 'extcon_cable' Describe the fields added in the past. Fixes: 067c1652e7a7 ("extcon: Add the support for extcon property according to extcon type") Signed-off-by: Andy Shevchenko Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/extcon/extcon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 356610404bb4..3bc83feb5a34 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -196,6 +196,10 @@ static const struct __extcon_info { * @attr_name: "name" sysfs entry * @attr_state: "state" sysfs entry * @attrs: the array pointing to attr_name and attr_state for attr_g + * @usb_propval: the array of USB connector properties + * @chg_propval: the array of charger connector properties + * @jack_propval: the array of jack connector properties + * @disp_propval: the array of display connector properties */ struct extcon_cable { struct extcon_dev *edev; From 6538e5d9f7eb90ea7cd50eb24ee18a205cfa9d00 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 22 Mar 2023 16:39:53 +0200 Subject: [PATCH 232/509] extcon: Fix kernel doc of property capability fields to avoid warnings [ Upstream commit 73346b9965ebda2feb7fef8629e9b28baee820e3 ] Kernel documentation has to be synchronized with a code, otherwise the validator is not happy: Function parameter or member 'usb_bits' not described in 'extcon_cable' Function parameter or member 'chg_bits' not described in 'extcon_cable' Function parameter or member 'jack_bits' not described in 'extcon_cable' Function parameter or member 'disp_bits' not described in 'extcon_cable' Describe the fields added in the past. Fixes: ceaa98f442cf ("extcon: Add the support for the capability of each property") Signed-off-by: Andy Shevchenko Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin --- drivers/extcon/extcon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 3bc83feb5a34..fa08dec389dc 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -200,6 +200,10 @@ static const struct __extcon_info { * @chg_propval: the array of charger connector properties * @jack_propval: the array of jack connector properties * @disp_propval: the array of display connector properties + * @usb_bits: the bit array of the USB connector property capabilities + * @chg_bits: the bit array of the charger connector property capabilities + * @jack_bits: the bit array of the jack connector property capabilities + * @disp_bits: the bit array of the display connector property capabilities */ struct extcon_cable { struct extcon_dev *edev; From 56901de563359de20513e16a9ae008ae2c22e9a9 Mon Sep 17 00:00:00 2001 From: Li Yang Date: Thu, 20 Apr 2023 22:08:31 +0800 Subject: [PATCH 233/509] usb: phy: phy-tahvo: fix memory leak in tahvo_usb_probe() [ Upstream commit 342161c11403ea00e9febc16baab1d883d589d04 ] Smatch reports: drivers/usb/phy/phy-tahvo.c: tahvo_usb_probe() warn: missing unwind goto? After geting irq, if ret < 0, it will return without error handling to free memory. Just add error handling to fix this problem. Fixes: 0d45a1373e66 ("usb: phy: tahvo: add IRQ check") Signed-off-by: Li Yang Reviewed-by: Dongliang Mu Link: https://lore.kernel.org/r/20230420140832.9110-1-lidaxian@hust.edu.cn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/phy/phy-tahvo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c index a3e043e3e4aa..d0672b671298 100644 --- a/drivers/usb/phy/phy-tahvo.c +++ b/drivers/usb/phy/phy-tahvo.c @@ -395,7 +395,7 @@ static int tahvo_usb_probe(struct platform_device *pdev) tu->irq = ret = platform_get_irq(pdev, 0); if (ret < 0) - return ret; + goto err_remove_phy; ret = request_threaded_irq(tu->irq, NULL, tahvo_usb_vbus_interrupt, IRQF_ONESHOT, "tahvo-vbus", tu); From d66ddb61fa23ed67a7aafc8cacab75e88dc62f49 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 22:17:42 +0200 Subject: [PATCH 234/509] usb: hide unused usbfs_notify_suspend/resume functions [ Upstream commit 8e6bd945e6dde64fbc60ec3fe252164493a8d3a2 ] The declaration is in an #ifdef, which causes warnings when building with 'make W=1' and without CONFIG_PM: drivers/usb/core/devio.c:742:6: error: no previous prototype for 'usbfs_notify_suspend' drivers/usb/core/devio.c:747:6: error: no previous prototype for 'usbfs_notify_resume' Use the same #ifdef check around the function definitions to avoid the warnings and slightly shrink the USB core. Fixes: 7794f486ed0b ("usbfs: Add ioctls for runtime power management") Signed-off-by: Arnd Bergmann Reviewed-by: Sebastian Reichel Acked-by: Alan Stern Link: https://lore.kernel.org/r/20230516202103.558301-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/core/devio.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 2fe29319de44..1b95035d179f 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -734,6 +734,7 @@ static int driver_resume(struct usb_interface *intf) return 0; } +#ifdef CONFIG_PM /* The following routines apply to the entire device, not interfaces */ void usbfs_notify_suspend(struct usb_device *udev) { @@ -752,6 +753,7 @@ void usbfs_notify_resume(struct usb_device *udev) } mutex_unlock(&usbfs_mutex); } +#endif struct usb_driver usbfs_driver = { .name = "usbfs", From 8d65d0a2bfd5f38d539a1aa9866421177b417380 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 25 May 2023 11:37:54 +0206 Subject: [PATCH 235/509] serial: 8250: lock port for stop_rx() in omap8250_irq() [ Upstream commit ca73a892c5bec4b08a2fa22b3015e98ed905abb7 ] The uarts_ops stop_rx() callback expects that the port->lock is taken and interrupts are disabled. Fixes: 1fe0e1fa3209 ("serial: 8250_omap: Handle optional overrun-throttle-ms property") Signed-off-by: John Ogness Reviewed-by: Tony Lindgren Link: https://lore.kernel.org/r/20230525093159.223817-4-john.ogness@linutronix.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 6b255e1633fd..6043d4fa08cc 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -655,7 +655,9 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) up->ier = port->serial_in(port, UART_IER); if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { + spin_lock(&port->lock); port->ops->stop_rx(port); + spin_unlock(&port->lock); } else { /* Keep restarting the timer until * the input overrun subsides. From a59f64a83516a2507920dd86ba3beee35d71cc6d Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 25 May 2023 11:37:58 +0206 Subject: [PATCH 236/509] serial: 8250: lock port for UART_IER access in omap8250_irq() [ Upstream commit 25614735a647693c1260f253dc3ab32127697806 ] omap8250_irq() accesses UART_IER. This register is modified twice by each console write (serial8250_console_write()) under the port lock. omap8250_irq() must also take the port lock to guanentee synchronized access to UART_IER. Since the port lock is already being taken for the stop_rx() callback and since it is safe to call cancel_delayed_work() while holding the port lock, simply extend the port lock region to include UART_IER access. Fixes: 1fe0e1fa3209 ("serial: 8250_omap: Handle optional overrun-throttle-ms property") Signed-off-by: John Ogness Reviewed-by: Tony Lindgren Link: https://lore.kernel.org/r/20230525093159.223817-8-john.ogness@linutronix.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 6043d4fa08cc..af39a2c4c2ee 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -653,17 +653,18 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id) if ((lsr & UART_LSR_OE) && up->overrun_backoff_time_ms > 0) { unsigned long delay; + /* Synchronize UART_IER access against the console. */ + spin_lock(&port->lock); up->ier = port->serial_in(port, UART_IER); if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { - spin_lock(&port->lock); port->ops->stop_rx(port); - spin_unlock(&port->lock); } else { /* Keep restarting the timer until * the input overrun subsides. */ cancel_delayed_work(&up->overrun_backoff); } + spin_unlock(&port->lock); delay = msecs_to_jiffies(up->overrun_backoff_time_ms); schedule_delayed_work(&up->overrun_backoff, delay); From 018eddcb6beff0c7e4f417a4cf05122b6c6d57db Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 23 May 2023 10:40:17 +0800 Subject: [PATCH 237/509] kernfs: fix missing kernfs_idr_lock to remove an ID from the IDR [ Upstream commit 30480b988f88c279752f3202a26b6fee5f586aef ] The root->ino_idr is supposed to be protected by kernfs_idr_lock, fix it. Fixes: 488dee96bb62 ("kernfs: allow creating kernfs objects with arbitrary uid/gid") Signed-off-by: Muchun Song Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230523024017.24851-1-songmuchun@bytedance.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/kernfs/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 8b3c86a502da..c91ee05cce74 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -679,7 +679,9 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, return kn; err_out3: + spin_lock(&kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); + spin_unlock(&kernfs_idr_lock); err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: From e52019c095352af3d0fd2b2416c1307a54939337 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 25 Apr 2023 15:35:28 +0100 Subject: [PATCH 238/509] coresight: Fix loss of connection info when a module is unloaded [ Upstream commit c45b2835e7b205783bdfe08cc98fa86a7c5eeb74 ] child_fwnode should be a read only property based on the DT or ACPI. If it's cleared on the parent device when a child is unloaded, then when the child is loaded again the connection won't be remade. child_dev should be cleared instead which signifies that the connection should be remade when the child_fwnode registers a new coresight_device. Similarly the reference count shouldn't be decremented as long as the parent device exists. The correct place to drop the reference is in coresight_release_platform_data() which is already done. Reproducible on Juno with the following steps: # load all coresight modules. $ cd /sys/bus/coresight/devices/ $ echo 1 > tmc_etr0/enable_sink $ echo 1 > etm0/enable_source # Works fine ^ $ echo 0 > etm0/enable_source $ rmmod coresight-funnel $ modprobe coresight-funnel $ echo 1 > etm0/enable_source -bash: echo: write error: Invalid argument Fixes: 37ea1ffddffa ("coresight: Use fwnode handle instead of device names") Fixes: 2af89ebacf29 ("coresight: Clear the connection field properly") Tested-by: Suzuki K Poulose Reviewed-by: Mike Leach Signed-off-by: James Clark Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230425143542.2305069-2-james.clark@arm.com Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-core.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index 5ddc8103503b..c4b805b04531 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1376,13 +1376,8 @@ static int coresight_remove_match(struct device *dev, void *data) if (csdev->dev.fwnode == conn->child_fwnode) { iterator->orphan = true; coresight_remove_links(iterator, conn); - /* - * Drop the reference to the handle for the remote - * device acquired in parsing the connections from - * platform data. - */ - fwnode_handle_put(conn->child_fwnode); - conn->child_fwnode = NULL; + + conn->child_dev = NULL; /* No need to continue */ break; } From 02b22660231db3c1a55d6709c5b9db165d0794c7 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 15 May 2023 22:57:10 +0200 Subject: [PATCH 239/509] mfd: rt5033: Drop rt5033-battery sub-device [ Upstream commit 43db1344e0f8c1eb687a1d6cd5b0de3009ab66cb ] The fuel gauge in the RT5033 PMIC (rt5033-battery) has its own I2C bus and interrupt lines. Therefore, it is not part of the MFD device and needs to be specified separately in the device tree. Fixes: 0b271258544b ("mfd: rt5033: Add Richtek RT5033 driver core.") Signed-off-by: Stephan Gerhold Signed-off-by: Jakob Hauser Reviewed-by: Linus Walleij Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/6a8a19bc67b5be3732882e8131ad2ffcb546ac03.1684182964.git.jahau@rocketmail.com Signed-off-by: Sasha Levin --- drivers/mfd/rt5033.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mfd/rt5033.c b/drivers/mfd/rt5033.c index 48381d9bf740..302115dabff4 100644 --- a/drivers/mfd/rt5033.c +++ b/drivers/mfd/rt5033.c @@ -41,9 +41,6 @@ static const struct mfd_cell rt5033_devs[] = { { .name = "rt5033-charger", .of_compatible = "richtek,rt5033-charger", - }, { - .name = "rt5033-battery", - .of_compatible = "richtek,rt5033-battery", }, { .name = "rt5033-led", .of_compatible = "richtek,rt5033-led", From b754ea60e690ffae053e8da79bc78d2f9d1f6e1f Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sat, 12 Sep 2020 20:03:01 +0100 Subject: [PATCH 240/509] media: venus: helpers: Fix ALIGN() of non power of two [ Upstream commit 927e78ac8bc58155316cf6f46026e1912bbbbcfc ] ALIGN() expects its second argument to be a power of 2, otherwise incorrect results are produced for some inputs. The output can be both larger or smaller than what is expected. For example, ALIGN(304, 192) equals 320 instead of 384, and ALIGN(65, 192) equals 256 instead of 192. However, nestling two ALIGN() as is done in this case seem to only produce results equal to or bigger than the expected result if ALIGN() had handled non powers of two, and that in turn results in framesizes that are either the correct size or too large. Fortunately, since 192 * 4 / 3 equals 256, it turns out that one ALIGN() is sufficient. Fixes: ab1eda449c6e ("media: venus: vdec: handle 10bit bitstreams") Signed-off-by: Rikard Falkeborn Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/platform/qcom/venus/helpers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c index 5ca3920237c5..5fdce5f07364 100644 --- a/drivers/media/platform/qcom/venus/helpers.c +++ b/drivers/media/platform/qcom/venus/helpers.c @@ -917,8 +917,8 @@ static u32 get_framesize_raw_yuv420_tp10_ubwc(u32 width, u32 height) u32 extradata = SZ_16K; u32 size; - y_stride = ALIGN(ALIGN(width, 192) * 4 / 3, 256); - uv_stride = ALIGN(ALIGN(width, 192) * 4 / 3, 256); + y_stride = ALIGN(width * 4 / 3, 256); + uv_stride = ALIGN(width * 4 / 3, 256); y_sclines = ALIGN(height, 16); uv_sclines = ALIGN((height + 1) >> 1, 16); From 4e8e838fce5e34b81fd8c17560be7ca27b212660 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 26 May 2023 12:53:23 +0100 Subject: [PATCH 241/509] media: atomisp: gmin_platform: fix out_len in gmin_get_config_dsm_var() [ Upstream commit 1657f2934daf89e8d9fa4b2697008909eb22c73e ] Ideally, strlen(cur->string.pointer) and strlen(out) would be the same. But this code is using strscpy() to avoid a potential buffer overflow. So in the same way we should take the strlen() of the smaller string to avoid a buffer overflow in the caller, gmin_get_var_int(). Link: https://lore.kernel.org/r/26124bcd-8132-4483-9d67-225c87d424e8@kili.mountain Fixes: 387041cda44e ("media: atomisp: improve sensor detection code to use _DSM table") Signed-off-by: Dan Carpenter Signed-off-by: Hans de Goede Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c index c9ee85037644..f0387486eb17 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c +++ b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c @@ -1198,7 +1198,7 @@ static int gmin_get_config_dsm_var(struct device *dev, dev_info(dev, "found _DSM entry for '%s': %s\n", var, cur->string.pointer); strscpy(out, cur->string.pointer, *out_len); - *out_len = strlen(cur->string.pointer); + *out_len = strlen(out); ACPI_FREE(obj); return 0; From 96898fb476d17704149d9f23716a399b6efbb522 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Fri, 24 Mar 2023 15:54:23 +0100 Subject: [PATCH 242/509] KVM: s390: fix KVM_S390_GET_CMMA_BITS for GFNs in memslot holes [ Upstream commit 285cff4c0454340a4dc53f46e67f2cb1c293bd74 ] The KVM_S390_GET_CMMA_BITS ioctl may return incorrect values when userspace specifies a start_gfn outside of memslots. This can occur when a VM has multiple memslots with a hole in between: +-----+----------+--------+--------+ | ... | Slot N-1 | | Slot N | +-----+----------+--------+--------+ ^ ^ ^ ^ | | | | GFN A A+B | | A+B+C | A+B+C+D When userspace specifies a GFN in [A+B, A+B+C), it would expect to get the CMMA values of the first dirty page in Slot N. However, userspace may get a start_gfn of A+B+C+D with a count of 0, hence completely skipping over any dirty pages in slot N. The error is in kvm_s390_next_dirty_cmma(), which assumes gfn_to_memslot_approx() will return the memslot _below_ the specified GFN when the specified GFN lies outside a memslot. In reality it may return either the memslot below or above the specified GFN. When a memslot above the specified GFN is returned this happens: - ofs is calculated, but since the memslot's base_gfn is larger than the specified cur_gfn, ofs will underflow to a huge number. - ofs is passed to find_next_bit(). Since ofs will exceed the memslot's number of pages, the number of pages in the memslot is returned, completely skipping over all bits in the memslot userspace would be interested in. Fix this by resetting ofs to zero when a memslot _above_ cur_gfn is returned (cur_gfn < ms->base_gfn). Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Fixes: afdad61615cc ("KVM: s390: Fix storage attributes migration with memory slots") Message-Id: <20230324145424.293889-2-nrb@linux.ibm.com> Signed-off-by: Claudio Imbrenda Signed-off-by: Janosch Frank Signed-off-by: Sasha Levin --- arch/s390/kvm/kvm-s390.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7ffc73ba220f..7a326d03087a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2005,6 +2005,10 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, ms = slots->memslots + slotidx; ofs = 0; } + + if (cur_gfn < ms->base_gfn) + ofs = 0; + ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); while ((slotidx > 0) && (ofs >= ms->npages)) { slotidx--; From a6171452085bad41d13e68dcb58d3ccb16c75552 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 4 Jun 2023 17:04:37 +0200 Subject: [PATCH 243/509] usb: dwc3: qcom: Release the correct resources in dwc3_qcom_remove() [ Upstream commit 8fd95da2cfb5046c4bb5a3cdc9eb7963ba8b10dd ] In the probe, some resources are allocated with dwc3_qcom_of_register_core() or dwc3_qcom_acpi_register_core(). The corresponding resources are already coorectly freed in the error handling path of the probe, but not in the remove function. Fix it. Fixes: 2bc02355f8ba ("usb: dwc3: qcom: Add support for booting with ACPI") Signed-off-by: Christophe JAILLET Reviewed-by: Andrew Halaney Message-ID: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-qcom.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 5c66efb05d7f..495d8bb8f156 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -869,10 +869,14 @@ static int dwc3_qcom_probe(struct platform_device *pdev) static int dwc3_qcom_remove(struct platform_device *pdev) { struct dwc3_qcom *qcom = platform_get_drvdata(pdev); + struct device_node *np = pdev->dev.of_node; struct device *dev = &pdev->dev; int i; - of_platform_depopulate(dev); + if (np) + of_platform_depopulate(&pdev->dev); + else + platform_device_put(pdev); for (i = qcom->num_clocks - 1; i >= 0; i--) { clk_disable_unprepare(qcom->clks[i]); From 0e9e127835c8ac157d9110be203ebfd639f526ef Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 4 Jun 2023 16:56:34 +0200 Subject: [PATCH 244/509] usb: dwc3: qcom: Fix an error handling path in dwc3_qcom_probe() [ Upstream commit 4a944da707123686d372ec01ea60056902fadf35 ] If dwc3_qcom_create_urs_usb_platdev() fails, some resources still need to be released, as already done in the other error handling path of the probe. Fixes: c25c210f590e ("usb: dwc3: qcom: add URS Host support for sdm845 ACPI boot") Signed-off-by: Christophe JAILLET Reviewed-by: Andrew Halaney Message-ID: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-qcom.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 495d8bb8f156..ec8c43231746 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -786,9 +786,10 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (IS_ERR_OR_NULL(qcom->urs_usb)) { dev_err(dev, "failed to create URS USB platdev\n"); if (!qcom->urs_usb) - return -ENODEV; + ret = -ENODEV; else - return PTR_ERR(qcom->urs_usb); + ret = PTR_ERR(qcom->urs_usb); + goto clk_disable; } } } From 7ade555ac58d12d1de6c5b4c5a79d631b1d06233 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Wed, 31 May 2023 20:11:14 +0530 Subject: [PATCH 245/509] usb: common: usb-conn-gpio: Set last role to unknown before initial detection [ Upstream commit edd60d24bd858cef165274e4cd6cab43bdc58d15 ] Currently if we bootup a device without cable connected, then usb-conn-gpio won't call set_role() since last_role is same as current role. This happens because during probe last_role gets initialised to zero. To avoid this, added a new constant in enum usb_role, last_role is set to USB_ROLE_UNKNOWN before performing initial detection. While at it, also handle default case for the usb_role switch in cdns3, intel-xhci-usb-role-switch & musb/jz4740 to avoid build warnings. Fixes: 4602f3bff266 ("usb: common: add USB GPIO based connection detection driver") Signed-off-by: Prashanth K Reviewed-by: AngeloGioacchino Del Regno Message-ID: <1685544074-17337-1-git-send-email-quic_prashk@quicinc.com> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/cdns3/core.c | 2 ++ drivers/usb/common/usb-conn-gpio.c | 3 +++ drivers/usb/musb/jz4740.c | 2 ++ drivers/usb/roles/intel-xhci-usb-role-switch.c | 2 ++ include/linux/usb/role.h | 1 + 5 files changed, 10 insertions(+) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index 8fe7420de033..e5fe640b2bb0 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -243,6 +243,8 @@ static enum usb_role cdns3_hw_role_state_machine(struct cdns3 *cdns) if (!vbus) role = USB_ROLE_NONE; break; + default: + break; } dev_dbg(cdns->dev, "role %d -> %d\n", cdns->role, role); diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c index c9545a4eff66..5754e467c16a 100644 --- a/drivers/usb/common/usb-conn-gpio.c +++ b/drivers/usb/common/usb-conn-gpio.c @@ -276,6 +276,9 @@ static int usb_conn_probe(struct platform_device *pdev) platform_set_drvdata(pdev, info); + /* Set last role to unknown before performing the initial detection */ + info->last_role = USB_ROLE_UNKNOWN; + /* Perform initial detection */ usb_conn_queue_dwork(info, 0); diff --git a/drivers/usb/musb/jz4740.c b/drivers/usb/musb/jz4740.c index c4fe1f4cd17a..f283629091ec 100644 --- a/drivers/usb/musb/jz4740.c +++ b/drivers/usb/musb/jz4740.c @@ -91,6 +91,8 @@ static int jz4740_musb_role_switch_set(struct usb_role_switch *sw, case USB_ROLE_HOST: atomic_notifier_call_chain(&phy->notifier, USB_EVENT_ID, phy); break; + default: + break; } return 0; diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c index 5c96e929acea..4d6a3dd06e01 100644 --- a/drivers/usb/roles/intel-xhci-usb-role-switch.c +++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c @@ -97,6 +97,8 @@ static int intel_xhci_usb_set_role(struct usb_role_switch *sw, val |= SW_VBUS_VALID; drd_config = DRD_CONFIG_STATIC_DEVICE; break; + default: + break; } val |= SW_IDPIN_EN; if (data->enable_sw_switch) { diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index b9ccaeb8a4ae..aecfce46d354 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -11,6 +11,7 @@ enum usb_role { USB_ROLE_NONE, USB_ROLE_HOST, USB_ROLE_DEVICE, + USB_ROLE_UNKNOWN, }; typedef int (*usb_role_switch_set_t)(struct usb_role_switch *sw, From 1dc07edc01d26880065b17aba55f2d9884171198 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 10 Jun 2023 15:32:52 +0200 Subject: [PATCH 246/509] usb: dwc3-meson-g12a: Fix an error handling path in dwc3_meson_g12a_probe() [ Upstream commit 01052b91c9808e3c3b068ae2721cb728ec9aa4c0 ] If dwc3_meson_g12a_otg_init() fails, resources allocated by the previous of_platform_populate() call should be released, as already done in the error handling path. Fixes: 1e355f21d3fb ("usb: dwc3: Add Amlogic A1 DWC3 glue") Signed-off-by: Christophe JAILLET Reviewed-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Message-ID: <9d28466de1808ccc756b4cc25fc72c482d133d13.1686403934.git.christophe.jaillet@wanadoo.fr> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-meson-g12a.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c index d0f9b7c296b0..69ec06efd7f2 100644 --- a/drivers/usb/dwc3/dwc3-meson-g12a.c +++ b/drivers/usb/dwc3/dwc3-meson-g12a.c @@ -805,7 +805,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) ret = dwc3_meson_g12a_otg_init(pdev, priv); if (ret) - goto err_phys_power; + goto err_plat_depopulate; pm_runtime_set_active(dev); pm_runtime_enable(dev); @@ -813,6 +813,9 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) return 0; +err_plat_depopulate: + of_platform_depopulate(dev); + err_phys_power: for (i = 0 ; i < PHY_COUNT ; ++i) phy_power_off(priv->phys[i]); From a81e1f22e17f4e18a119f07d3cdf3ec906621113 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Fri, 9 Jun 2023 09:48:18 +0800 Subject: [PATCH 247/509] mfd: intel-lpss: Add missing check for platform_get_resource [ Upstream commit d918e0d5824495a75d00b879118b098fcab36fdb ] Add the missing check for platform_get_resource and return error if it fails. Fixes: 4b45efe85263 ("mfd: Add support for Intel Sunrisepoint LPSS devices") Signed-off-by: Jiasheng Jiang Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230609014818.28475-1-jiasheng@iscas.ac.cn Signed-off-by: Sasha Levin --- drivers/mfd/intel-lpss-acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mfd/intel-lpss-acpi.c b/drivers/mfd/intel-lpss-acpi.c index 045cbf0cbe53..993e305a232c 100644 --- a/drivers/mfd/intel-lpss-acpi.c +++ b/drivers/mfd/intel-lpss-acpi.c @@ -114,6 +114,9 @@ static int intel_lpss_acpi_probe(struct platform_device *pdev) return -ENOMEM; info->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!info->mem) + return -ENODEV; + info->irq = platform_get_irq(pdev, 0); ret = intel_lpss_probe(&pdev->dev, info); From 8e00ae25a37159e8fa619ade9204c58cdfa7370e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 15 Jun 2023 11:30:35 +0200 Subject: [PATCH 248/509] Revert "usb: common: usb-conn-gpio: Set last role to unknown before initial detection" [ Upstream commit df49f2a0ac4a34c0cb4b5c233fcfa0add644c43c ] This reverts commit edd60d24bd858cef165274e4cd6cab43bdc58d15. Heikki reports that this should not be a global flag just to work around one broken driver and should be fixed differently, so revert it. Reported-by: Heikki Krogerus Fixes: edd60d24bd85 ("usb: common: usb-conn-gpio: Set last role to unknown before initial detection") Link: https://lore.kernel.org/r/ZImE4L3YgABnCIsP@kuha.fi.intel.com Cc: Prashanth K Cc: AngeloGioacchino Del Regno Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/cdns3/core.c | 2 -- drivers/usb/common/usb-conn-gpio.c | 3 --- drivers/usb/musb/jz4740.c | 2 -- drivers/usb/roles/intel-xhci-usb-role-switch.c | 2 -- include/linux/usb/role.h | 1 - 5 files changed, 10 deletions(-) diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c index e5fe640b2bb0..8fe7420de033 100644 --- a/drivers/usb/cdns3/core.c +++ b/drivers/usb/cdns3/core.c @@ -243,8 +243,6 @@ static enum usb_role cdns3_hw_role_state_machine(struct cdns3 *cdns) if (!vbus) role = USB_ROLE_NONE; break; - default: - break; } dev_dbg(cdns->dev, "role %d -> %d\n", cdns->role, role); diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c index 5754e467c16a..c9545a4eff66 100644 --- a/drivers/usb/common/usb-conn-gpio.c +++ b/drivers/usb/common/usb-conn-gpio.c @@ -276,9 +276,6 @@ static int usb_conn_probe(struct platform_device *pdev) platform_set_drvdata(pdev, info); - /* Set last role to unknown before performing the initial detection */ - info->last_role = USB_ROLE_UNKNOWN; - /* Perform initial detection */ usb_conn_queue_dwork(info, 0); diff --git a/drivers/usb/musb/jz4740.c b/drivers/usb/musb/jz4740.c index f283629091ec..c4fe1f4cd17a 100644 --- a/drivers/usb/musb/jz4740.c +++ b/drivers/usb/musb/jz4740.c @@ -91,8 +91,6 @@ static int jz4740_musb_role_switch_set(struct usb_role_switch *sw, case USB_ROLE_HOST: atomic_notifier_call_chain(&phy->notifier, USB_EVENT_ID, phy); break; - default: - break; } return 0; diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c index 4d6a3dd06e01..5c96e929acea 100644 --- a/drivers/usb/roles/intel-xhci-usb-role-switch.c +++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c @@ -97,8 +97,6 @@ static int intel_xhci_usb_set_role(struct usb_role_switch *sw, val |= SW_VBUS_VALID; drd_config = DRD_CONFIG_STATIC_DEVICE; break; - default: - break; } val |= SW_IDPIN_EN; if (data->enable_sw_switch) { diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index aecfce46d354..b9ccaeb8a4ae 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -11,7 +11,6 @@ enum usb_role { USB_ROLE_NONE, USB_ROLE_HOST, USB_ROLE_DEVICE, - USB_ROLE_UNKNOWN, }; typedef int (*usb_role_switch_set_t)(struct usb_role_switch *sw, From 5b31ac1d6d889bdd2e7bd97cfa8b993323ae300b Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 14 Jun 2023 07:59:19 +0300 Subject: [PATCH 249/509] serial: 8250_omap: Use force_suspend and resume for system suspend [ Upstream commit 20a41a62618df85f3a2981008edec5cadd785e0a ] We should not rely on autosuspend timeout for system suspend. Instead, let's use force_suspend and force_resume functions. Otherwise the serial port controller device may not be idled on suspend. As we are doing a register write on suspend to configure the serial port, we still need to runtime PM resume the port on suspend. While at it, let's switch to pm_runtime_resume_and_get() and check for errors returned. And let's add the missing line break before return to the suspend function while at it. Fixes: 09d8b2bdbc5c ("serial: 8250: omap: Provide ability to enable/disable UART as wakeup source") Signed-off-by: Tony Lindgren Tested-by: Dhruva Gole Message-ID: <20230614045922.4798-1-tony@atomide.com> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/8250/8250_omap.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index af39a2c4c2ee..e26ac3f42e05 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -1521,25 +1521,35 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); + int err; serial8250_suspend_port(priv->line); - pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); + if (err) + return err; if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - pm_runtime_mark_last_busy(dev); - pm_runtime_put_autosuspend(dev); - + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); - return 0; + + return err; } static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); + int err; + err = pm_runtime_force_resume(dev); + if (err) + return err; serial8250_resume_port(priv->line); + /* Paired with pm_runtime_resume_and_get() in omap8250_suspend() */ + pm_runtime_mark_last_busy(dev); + pm_runtime_put_autosuspend(dev); + return 0; } #else From 4d2405147385e6e281585230c3e7a54464876ea5 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Tue, 6 Jun 2023 09:08:10 +0200 Subject: [PATCH 250/509] test_firmware: return ENOMEM instead of ENOSPC on failed memory allocation [ Upstream commit 7dae593cd226a0bca61201cf85ceb9335cf63682 ] In a couple of situations like name = kstrndup(buf, count, GFP_KERNEL); if (!name) return -ENOSPC; the error is not actually "No space left on device", but "Out of memory". It is semantically correct to return -ENOMEM in all failed kstrndup() and kzalloc() cases in this driver, as it is not a problem with disk space, but with kernel memory allocator failing allocation. The semantically correct should be: name = kstrndup(buf, count, GFP_KERNEL); if (!name) return -ENOMEM; Cc: Dan Carpenter Cc: Takashi Iwai Cc: Kees Cook Cc: "Luis R. Rodriguez" Cc: Scott Branden Cc: Hans de Goede Cc: Brian Norris Fixes: c92316bf8e948 ("test_firmware: add batched firmware tests") Fixes: 0a8adf584759c ("test: add firmware_class loader test") Fixes: 548193cba2a7d ("test_firmware: add support for firmware_request_platform") Fixes: eb910947c82f9 ("test: firmware_class: add asynchronous request trigger") Fixes: 061132d2b9c95 ("test_firmware: add test custom fallback trigger") Fixes: 7feebfa487b92 ("test_firmware: add support for request_firmware_into_buf") Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Dan Carpenter Message-ID: <20230606070808.9300-1-mirsad.todorovac@alu.unizg.hr> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- lib/test_firmware.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/test_firmware.c b/lib/test_firmware.c index ed0455a9ded8..25dc9eb6c902 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -183,7 +183,7 @@ static int __kstrncpy(char **dst, const char *name, size_t count, gfp_t gfp) { *dst = kstrndup(name, count, gfp); if (!*dst) - return -ENOSPC; + return -ENOMEM; return count; } @@ -606,7 +606,7 @@ static ssize_t trigger_request_store(struct device *dev, name = kstrndup(buf, count, GFP_KERNEL); if (!name) - return -ENOSPC; + return -ENOMEM; pr_info("loading '%s'\n", name); @@ -654,7 +654,7 @@ static ssize_t trigger_request_platform_store(struct device *dev, name = kstrndup(buf, count, GFP_KERNEL); if (!name) - return -ENOSPC; + return -ENOMEM; pr_info("inserting test platform fw '%s'\n", name); efi_embedded_fw.name = name; @@ -707,7 +707,7 @@ static ssize_t trigger_async_request_store(struct device *dev, name = kstrndup(buf, count, GFP_KERNEL); if (!name) - return -ENOSPC; + return -ENOMEM; pr_info("loading '%s'\n", name); @@ -752,7 +752,7 @@ static ssize_t trigger_custom_fallback_store(struct device *dev, name = kstrndup(buf, count, GFP_KERNEL); if (!name) - return -ENOSPC; + return -ENOMEM; pr_info("loading '%s' using custom fallback mechanism\n", name); @@ -803,7 +803,7 @@ static int test_fw_run_batch_request(void *data) test_buf = kzalloc(TEST_FIRMWARE_BUF_SIZE, GFP_KERNEL); if (!test_buf) - return -ENOSPC; + return -ENOMEM; if (test_fw_config->partial) req->rc = request_partial_firmware_into_buf From 30ead8b9bf0d6fa9e760e8c1c6467fac4376d165 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 9 Jun 2023 11:28:03 +0200 Subject: [PATCH 251/509] mfd: stmfx: Fix error path in stmfx_chip_init [ Upstream commit f592cf624531286f8b52e40dcfc157a5a7fb115c ] In error path, disable vdd regulator if it exists, but don't overload ret. Because if regulator_disable() is successful, stmfx_chip_init will exit successfully while chip init failed. Fixes: 06252ade9156 ("mfd: Add ST Multi-Function eXpander (STMFX) core driver") Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20230609092804.793100-1-amelie.delaunay@foss.st.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/stmfx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c index 988e2ba6dd0f..41e74b5dd990 100644 --- a/drivers/mfd/stmfx.c +++ b/drivers/mfd/stmfx.c @@ -387,7 +387,7 @@ static int stmfx_chip_init(struct i2c_client *client) err: if (stmfx->vdd) - return regulator_disable(stmfx->vdd); + regulator_disable(stmfx->vdd); return ret; } From c5e2f6f2bb66a53e9f7283a5c5f2cc0a6ed7b3e9 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 9 Jun 2023 11:28:04 +0200 Subject: [PATCH 252/509] mfd: stmfx: Nullify stmfx->vdd in case of error [ Upstream commit 7c81582c0bccb4757186176f0ee12834597066ad ] Nullify stmfx->vdd in case devm_regulator_get_optional() returns an error. And simplify code by returning an error only if return code is not -ENODEV, which means there is no vdd regulator and it is not an issue. Fixes: d75846ed08e6 ("mfd: stmfx: Fix dev_err_probe() call in stmfx_chip_init()") Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20230609092804.793100-2-amelie.delaunay@foss.st.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/stmfx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/mfd/stmfx.c b/drivers/mfd/stmfx.c index 41e74b5dd990..b45d7b0b842c 100644 --- a/drivers/mfd/stmfx.c +++ b/drivers/mfd/stmfx.c @@ -330,9 +330,8 @@ static int stmfx_chip_init(struct i2c_client *client) stmfx->vdd = devm_regulator_get_optional(&client->dev, "vdd"); ret = PTR_ERR_OR_ZERO(stmfx->vdd); if (ret) { - if (ret == -ENODEV) - stmfx->vdd = NULL; - else + stmfx->vdd = NULL; + if (ret != -ENODEV) return dev_err_probe(&client->dev, ret, "Failed to get VDD regulator\n"); } From 724448d6021d44a7aebaaf748dbe96104ac828c3 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Wed, 10 May 2023 17:42:58 +0200 Subject: [PATCH 253/509] KVM: s390: vsie: fix the length of APCB bitmap [ Upstream commit 246be7d2720ea9a795b576067ecc5e5c7a1e7848 ] bit_and() uses the count of bits as the woking length. Fix the previous implementation and effectively use the right bitmap size. Fixes: 19fd83a64718 ("KVM: s390: vsie: allow CRYCB FORMAT-1") Fixes: 56019f9aca22 ("KVM: s390: vsie: Allow CRYCB FORMAT-2") Signed-off-by: Pierre Morel Reviewed-by: Janosch Frank Link: https://lore.kernel.org/kvm/20230511094719.9691-1-pmorel@linux.ibm.com/ Signed-off-by: Janosch Frank Signed-off-by: Sasha Levin --- arch/s390/kvm/vsie.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ff58decfef5e..192eacc8fbb7 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -168,7 +168,8 @@ static int setup_apcb00(struct kvm_vcpu *vcpu, unsigned long *apcb_s, sizeof(struct kvm_s390_apcb0))) return -EFAULT; - bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb0)); + bitmap_and(apcb_s, apcb_s, apcb_h, + BITS_PER_BYTE * sizeof(struct kvm_s390_apcb0)); return 0; } @@ -190,7 +191,8 @@ static int setup_apcb11(struct kvm_vcpu *vcpu, unsigned long *apcb_s, sizeof(struct kvm_s390_apcb1))) return -EFAULT; - bitmap_and(apcb_s, apcb_s, apcb_h, sizeof(struct kvm_s390_apcb1)); + bitmap_and(apcb_s, apcb_s, apcb_h, + BITS_PER_BYTE * sizeof(struct kvm_s390_apcb1)); return 0; } From 442e1a98bd0220d1e17ec0f37052e636e2222d7f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 17 Jun 2023 12:43:16 +0200 Subject: [PATCH 254/509] mfd: stmpe: Only disable the regulators if they are enabled [ Upstream commit 104d32bd81f620bb9f67fbf7d1159c414e89f05f ] In stmpe_probe(), if some regulator_enable() calls fail, probing continues and there is only a dev_warn(). So, if stmpe_probe() is called the regulator may not be enabled. It is cleaner to test it before calling regulator_disable() in the remove function. Fixes: 9c9e321455fb ("mfd: stmpe: add optional regulators") Signed-off-by: Christophe JAILLET Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/8de3aaf297931d655b9ad6aed548f4de8b85425a.1686998575.git.christophe.jaillet@wanadoo.fr Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/mfd/stmpe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index 508349399f8a..7f758fb60c1f 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -1494,9 +1494,9 @@ int stmpe_probe(struct stmpe_client_info *ci, enum stmpe_partnum partnum) int stmpe_remove(struct stmpe *stmpe) { - if (!IS_ERR(stmpe->vio)) + if (!IS_ERR(stmpe->vio) && regulator_is_enabled(stmpe->vio)) regulator_disable(stmpe->vio); - if (!IS_ERR(stmpe->vcc)) + if (!IS_ERR(stmpe->vcc) && regulator_is_enabled(stmpe->vcc)) regulator_disable(stmpe->vcc); __stmpe_disable(stmpe, STMPE_BLOCK_ADC); From e4845cdea71e280de71946577e7a47ba48dc84af Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 31 May 2023 10:39:50 +0300 Subject: [PATCH 255/509] phy: tegra: xusb: check return value of devm_kzalloc() [ Upstream commit 44faada0f38fc333d392af04c343b0e23f8f5d81 ] devm_kzalloc() returns a pointer to dynamically allocated memory. Pointer could be NULL in case allocation fails. Check pointer validity. Identified with coccinelle (kmerr.cocci script). Fixes: f67213cee2b3 ("phy: tegra: xusb: Add usb-role-switch support") Signed-off-by: Claudiu Beznea Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20230531073950.145339-1-claudiu.beznea@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/tegra/xusb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index f93be3c4a4a6..8f11b293c48d 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -663,6 +663,9 @@ static int tegra_xusb_setup_usb_role_switch(struct tegra_xusb_port *port) port->dev.driver = devm_kzalloc(&port->dev, sizeof(struct device_driver), GFP_KERNEL); + if (!port->dev.driver) + return -ENOMEM; + port->dev.driver->owner = THIS_MODULE; port->usb_role_sw = usb_role_switch_register(&port->dev, From 8a0413be8a1e352d15b8634e0551c3111a4f919f Mon Sep 17 00:00:00 2001 From: Fancy Fang Date: Fri, 5 May 2023 14:58:39 +0800 Subject: [PATCH 256/509] pwm: imx-tpm: force 'real_period' to be zero in suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 661dfb7f46298e53f6c3deaa772fa527aae86193 ] During suspend, all the tpm registers will lose values. So the 'real_period' value of struct 'imx_tpm_pwm_chip' should be forced to be zero to force the period update code can be executed after system resume back. Signed-off-by: Fancy Fang Signed-off-by: Clark Wang Acked-by: Uwe Kleine-Kƶnig Fixes: 738a1cfec2ed ("pwm: Add i.MX TPM PWM driver support") Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/pwm-imx-tpm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c index fcdf6befb838..871527b78aa4 100644 --- a/drivers/pwm/pwm-imx-tpm.c +++ b/drivers/pwm/pwm-imx-tpm.c @@ -403,6 +403,13 @@ static int __maybe_unused pwm_imx_tpm_suspend(struct device *dev) if (tpm->enable_count > 0) return -EBUSY; + /* + * Force 'real_period' to be zero to force period update code + * can be executed after system resume back, since suspend causes + * the period related registers to become their reset values. + */ + tpm->real_period = 0; + clk_disable_unprepare(tpm->clk); return 0; From aa70e5dd72683ea4acc66dfd4ae0e554930d637c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 12 May 2023 18:47:36 +0200 Subject: [PATCH 257/509] pwm: sysfs: Do not apply state to already disabled PWMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 38ba83598633373f47951384cfc389181c8d1bed ] If the PWM is exported but not enabled, do not call pwm_class_apply_state(). First of all, in this case, period may still be unconfigured and this would make pwm_class_apply_state() return -EINVAL, and then suspend would fail. Second, it makes little sense to apply state onto PWM that is not enabled before suspend. Failing case: " $ echo 1 > /sys/class/pwm/pwmchip4/export $ echo mem > /sys/power/state ... pwm pwmchip4: PM: dpm_run_callback(): pwm_class_suspend+0x1/0xa8 returns -22 pwm pwmchip4: PM: failed to suspend: error -22 PM: Some devices failed to suspend, or early wake event detected " Working case: " $ echo 1 > /sys/class/pwm/pwmchip4/export $ echo 100 > /sys/class/pwm/pwmchip4/pwm1/period $ echo 10 > /sys/class/pwm/pwmchip4/pwm1/duty_cycle $ echo mem > /sys/power/state ... " Do not call pwm_class_apply_state() in case the PWM is disabled to fix this issue. Fixes: 7fd4edc57bbae ("pwm: sysfs: Add suspend/resume support") Signed-off-by: Marek Vasut Fixes: ef2bf4997f7d ("pwm: Improve args checking in pwm_apply_state()") Reviewed-by: Brian Norris Reviewed-by: Uwe Kleine-Kƶnig Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/pwm/sysfs.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 9903c3a7eced..b8417a8d2ef9 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -424,6 +424,13 @@ static int pwm_class_resume_npwm(struct device *parent, unsigned int npwm) if (!export) continue; + /* If pwmchip was not enabled before suspend, do nothing. */ + if (!export->suspend.enabled) { + /* release lock taken in pwm_class_get_state */ + mutex_unlock(&export->lock); + continue; + } + state.enabled = export->suspend.enabled; ret = pwm_class_apply_state(export, pwm, &state); if (ret < 0) @@ -448,7 +455,17 @@ static int __maybe_unused pwm_class_suspend(struct device *parent) if (!export) continue; + /* + * If pwmchip was not enabled before suspend, save + * state for resume time and do nothing else. + */ export->suspend = state; + if (!state.enabled) { + /* release lock taken in pwm_class_get_state */ + mutex_unlock(&export->lock); + continue; + } + state.enabled = false; ret = pwm_class_apply_state(export, pwm, &state); if (ret < 0) { From f733a7bfe8f8aec45b9442c54f1ab293fc815d37 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 8 Jun 2023 21:11:42 +0200 Subject: [PATCH 258/509] rtc: st-lpc: Release some resources in st_rtc_probe() in case of error [ Upstream commit 06c6e1b01d9261f03629cefd1f3553503291e6cf ] If an error occurs after clk_get(), the corresponding resources should be released. Use devm_clk_get() to fix it. Fixes: b5b2bdfc2893 ("rtc: st: Add new driver for ST's LPC RTC") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/866af6adbc7454a7b4505eb6c28fbdc86ccff39e.1686251455.git.christophe.jaillet@wanadoo.fr Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/rtc/rtc-st-lpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c index 7d53f7e2febc..c4ea3f3f0884 100644 --- a/drivers/rtc/rtc-st-lpc.c +++ b/drivers/rtc/rtc-st-lpc.c @@ -228,7 +228,7 @@ static int st_rtc_probe(struct platform_device *pdev) enable_irq_wake(rtc->irq); disable_irq(rtc->irq); - rtc->clk = clk_get(&pdev->dev, NULL); + rtc->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(rtc->clk)) { dev_err(&pdev->dev, "Unable to request clock\n"); return PTR_ERR(rtc->clk); From 620993d5ee5bc1833d7995d658c9ad63527bef4b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 Jun 2023 04:54:35 +0200 Subject: [PATCH 259/509] media: cec: i2c: ch7322: also select REGMAP [ Upstream commit 29f96ac23648b2259f42d40703c47dd18fd172ca ] Selecting only REGMAP_I2C can leave REGMAP unset, causing build errors, so also select REGMAP to prevent the build errors. ../drivers/media/cec/i2c/ch7322.c:158:21: error: variable 'ch7322_regmap' has initializer but incomplete type 158 | static const struct regmap_config ch7322_regmap = { ../drivers/media/cec/i2c/ch7322.c:159:10: error: 'const struct regmap_config' has no member named 'reg_bits' 159 | .reg_bits = 8, ../drivers/media/cec/i2c/ch7322.c:159:21: warning: excess elements in struct initializer 159 | .reg_bits = 8, ../drivers/media/cec/i2c/ch7322.c:160:10: error: 'const struct regmap_config' has no member named 'val_bits' 160 | .val_bits = 8, ../drivers/media/cec/i2c/ch7322.c:160:21: warning: excess elements in struct initializer 160 | .val_bits = 8, ../drivers/media/cec/i2c/ch7322.c:161:10: error: 'const struct regmap_config' has no member named 'max_register' 161 | .max_register = 0x7f, ../drivers/media/cec/i2c/ch7322.c:161:25: warning: excess elements in struct initializer 161 | .max_register = 0x7f, ../drivers/media/cec/i2c/ch7322.c:162:10: error: 'const struct regmap_config' has no member named 'disable_locking' 162 | .disable_locking = true, ../drivers/media/cec/i2c/ch7322.c:162:28: warning: excess elements in struct initializer 162 | .disable_locking = true, ../drivers/media/cec/i2c/ch7322.c: In function 'ch7322_probe': ../drivers/media/cec/i2c/ch7322.c:468:26: error: implicit declaration of function 'devm_regmap_init_i2c' [-Werror=implicit-function-declaration] 468 | ch7322->regmap = devm_regmap_init_i2c(client, &ch7322_regmap); ../drivers/media/cec/i2c/ch7322.c:468:24: warning: assignment to 'struct regmap *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 468 | ch7322->regmap = devm_regmap_init_i2c(client, &ch7322_regmap); ../drivers/media/cec/i2c/ch7322.c: At top level: ../drivers/media/cec/i2c/ch7322.c:158:35: error: storage size of 'ch7322_regmap' isn't known 158 | static const struct regmap_config ch7322_regmap = { Link: https://lore.kernel.org/linux-media/20230608025435.29249-1-rdunlap@infradead.org Fixes: 21b9a47e0ec7 ("media: cec: i2c: ch7322: Add ch7322 CEC controller driver") Signed-off-by: Randy Dunlap Cc: Jeff Chase Cc: Hans Verkuil Cc: Joe Tessler Cc: Arnd Bergmann Cc: Mark Brown Cc: Masahiro Yamada Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/cec/i2c/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/cec/i2c/Kconfig b/drivers/media/cec/i2c/Kconfig index 70432a1d6918..d912d143fb31 100644 --- a/drivers/media/cec/i2c/Kconfig +++ b/drivers/media/cec/i2c/Kconfig @@ -5,6 +5,7 @@ config CEC_CH7322 tristate "Chrontel CH7322 CEC controller" depends on I2C + select REGMAP select REGMAP_I2C select CEC_CORE help From 6d2243ab783bf79d1d674ff0ca26229233c56508 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Tue, 27 Jun 2023 12:03:40 +0000 Subject: [PATCH 260/509] sctp: fix potential deadlock on &net->sctp.addr_wq_lock [ Upstream commit 6feb37b3b06e9049e20dcf7e23998f92c9c5be9a ] As &net->sctp.addr_wq_lock is also acquired by the timer sctp_addr_wq_timeout_handler() in protocal.c, the same lock acquisition at sctp_auto_asconf_init() seems should disable irq since it is called from sctp_accept() under process context. Possible deadlock scenario: sctp_accept() -> sctp_sock_migrate() -> sctp_auto_asconf_init() -> spin_lock(&net->sctp.addr_wq_lock) -> sctp_addr_wq_timeout_handler() -> spin_lock_bh(&net->sctp.addr_wq_lock); (deadlock here) This flaw was found using an experimental static analysis tool we are developing for irq-related deadlock. The tentative patch fix the potential deadlock by spin_lock_bh(). Signed-off-by: Chengfeng Ye Fixes: 34e5b0118685 ("sctp: delay auto_asconf init until binding the first addr") Acked-by: Xin Long Link: https://lore.kernel.org/r/20230627120340.19432-1-dg573847474@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4a7f811abae4..534364bb871a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -362,9 +362,9 @@ static void sctp_auto_asconf_init(struct sctp_sock *sp) struct net *net = sock_net(&sp->inet.sk); if (net->sctp.default_auto_asconf) { - spin_lock(&net->sctp.addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); - spin_unlock(&net->sctp.addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); sp->do_auto_asconf = 1; } } From c377451012cec94f01b6c698409fec865e55dd3f Mon Sep 17 00:00:00 2001 From: Tobias Heider Date: Wed, 28 Jun 2023 02:13:32 +0200 Subject: [PATCH 261/509] Add MODULE_FIRMWARE() for FIRMWARE_TG357766. [ Upstream commit 046f753da6143ee16452966915087ec8b0de3c70 ] Fixes a bug where on the M1 mac mini initramfs-tools fails to include the necessary firmware into the initrd. Fixes: c4dab50697ff ("tg3: Download 57766 EEE service patch firmware") Signed-off-by: Tobias Heider Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/ZJt7LKzjdz8+dClx@tobhe.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/tg3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 613ca6124e3c..d14f37be1eb3 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -224,6 +224,7 @@ MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE_TG3); +MODULE_FIRMWARE(FIRMWARE_TG357766); MODULE_FIRMWARE(FIRMWARE_TG3TSO); MODULE_FIRMWARE(FIRMWARE_TG3TSO5); From 1f3643f9cfca51c2211fd82cf7c403fb84c27095 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Wed, 28 Jun 2023 21:43:27 +0200 Subject: [PATCH 262/509] net: dsa: vsc73xx: fix MTU configuration [ Upstream commit 3cf62c8177adb0db9e15c8b898c44f997acf3ebf ] Switch in MAXLEN register stores the maximum size of a data frame. The MTU size is 18 bytes smaller than the frame size. The current settings are causing problems with packet forwarding. This patch fixes the MTU settings to proper values. Fixes: fb77ffc6ec86 ("net: dsa: vsc73xx: make the MTU configurable") Reviewed-by: Linus Walleij Signed-off-by: Pawel Dembicki Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230628194327.1765644-1-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/dsa/vitesse-vsc73xx-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index 19ce4aa0973b..80eadf509c0a 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -1025,17 +1025,17 @@ static int vsc73xx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) struct vsc73xx *vsc = ds->priv; return vsc73xx_write(vsc, VSC73XX_BLOCK_MAC, port, - VSC73XX_MAXLEN, new_mtu); + VSC73XX_MAXLEN, new_mtu + ETH_HLEN + ETH_FCS_LEN); } /* According to application not "VSC7398 Jumbo Frames" setting - * up the MTU to 9.6 KB does not affect the performance on standard + * up the frame size to 9.6 KB does not affect the performance on standard * frames. It is clear from the application note that * "9.6 kilobytes" == 9600 bytes. */ static int vsc73xx_get_max_mtu(struct dsa_switch *ds, int port) { - return 9600; + return 9600 - ETH_HLEN - ETH_FCS_LEN; } static const struct dsa_switch_ops vsc73xx_ds_ops = { From 32b9c8f7892c19f7f5c9fed5fb410b9fd5990bb6 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 29 Jun 2023 15:43:05 +0200 Subject: [PATCH 263/509] spi: bcm-qspi: return error if neither hif_mspi nor mspi is available [ Upstream commit 7c1f23ad34fcdace50275a6aa1e1969b41c6233f ] If neither a "hif_mspi" nor "mspi" resource is present, the driver will just early exit in probe but still return success. Apart from not doing anything meaningful, this would then also lead to a null pointer access on removal, as platform_get_drvdata() would return NULL, which it would then try to dereference when trying to unregister the spi master. Fix this by unconditionally calling devm_ioremap_resource(), as it can handle a NULL res and will then return a viable ERR_PTR() if we get one. The "return 0;" was previously a "goto qspi_resource_err;" where then ret was returned, but since ret was still initialized to 0 at this place this was a valid conversion in 63c5395bb7a9 ("spi: bcm-qspi: Fix use-after-free on unbind"). The issue was not introduced by this commit, only made more obvious. Fixes: fa236a7ef240 ("spi: bcm-qspi: Add Broadcom MSPI driver") Signed-off-by: Jonas Gorski Reviewed-by: Kamal Dasu Link: https://lore.kernel.org/r/20230629134306.95823-1-jonas.gorski@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm-qspi.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 766b00350e39..2c734ea0784b 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1369,13 +1369,9 @@ int bcm_qspi_probe(struct platform_device *pdev, res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "mspi"); - if (res) { - qspi->base[MSPI] = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->base[MSPI])) - return PTR_ERR(qspi->base[MSPI]); - } else { - return 0; - } + qspi->base[MSPI] = devm_ioremap_resource(dev, res); + if (IS_ERR(qspi->base[MSPI])) + return PTR_ERR(qspi->base[MSPI]); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bspi"); if (res) { From 358145cc379707d3b73d21cd8f3b34db953450c9 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 20 Jun 2023 20:00:22 -0500 Subject: [PATCH 264/509] mailbox: ti-msgmgr: Fill non-message tx data fields with 0x0 [ Upstream commit 1b712f18c461bd75f018033a15cf381e712806b5 ] Sec proxy/message manager data buffer is 60 bytes with the last of the registers indicating transmission completion. This however poses a bit of a challenge. The backing memory for sec_proxy / message manager is regular memory, and all sec proxy does is to trigger a burst of all 60 bytes of data over to the target thread backing ring accelerator. It doesn't do a memory scrub when it moves data out in the burst. When we transmit multiple messages, remnants of previous message is also transmitted which results in some random data being set in TISCI fields of messages that have been expanded forward. The entire concept of backward compatibility hinges on the fact that the unused message fields remain 0x0 allowing for 0x0 value to be specially considered when backward compatibility of message extension is done. So, instead of just writing the completion register, we continue to fill the message buffer up with 0x0 (note: for partial message involving completion, we already do this). This allows us to scale and introduce ABI changes back also work with other boot stages that may have left data in the internal memory. While at this, be consistent and explicit with the data_reg pointer increment. Fixes: aace66b170ce ("mailbox: Introduce TI message manager driver") Signed-off-by: Nishanth Menon Signed-off-by: Jassi Brar Signed-off-by: Sasha Levin --- drivers/mailbox/ti-msgmgr.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/mailbox/ti-msgmgr.c b/drivers/mailbox/ti-msgmgr.c index 0130628f4d9d..535fe73ce310 100644 --- a/drivers/mailbox/ti-msgmgr.c +++ b/drivers/mailbox/ti-msgmgr.c @@ -385,14 +385,20 @@ static int ti_msgmgr_send_data(struct mbox_chan *chan, void *data) /* Ensure all unused data is 0 */ data_trail &= 0xFFFFFFFF >> (8 * (sizeof(u32) - trail_bytes)); writel(data_trail, data_reg); - data_reg++; + data_reg += sizeof(u32); } + /* * 'data_reg' indicates next register to write. If we did not already * write on tx complete reg(last reg), we must do so for transmit + * In addition, we also need to make sure all intermediate data + * registers(if any required), are reset to 0 for TISCI backward + * compatibility to be maintained. */ - if (data_reg <= qinst->queue_buff_end) - writel(0, qinst->queue_buff_end); + while (data_reg <= qinst->queue_buff_end) { + writel(0, data_reg); + data_reg += sizeof(u32); + } return 0; } From 5ded9e8aa53e88f3aa81948c2622ba5e14aaef3d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Jun 2023 09:41:02 +0800 Subject: [PATCH 265/509] f2fs: fix error path handling in truncate_dnode() [ Upstream commit 0135c482fa97e2fd8245cb462784112a00ed1211 ] If truncate_node() fails in truncate_dnode(), it missed to call f2fs_put_page(), fix it. Fixes: 7735730d39d7 ("f2fs: fix to propagate error from __get_meta_page()") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c63274d4b74b..02cb1c806c3e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -884,8 +884,10 @@ static int truncate_dnode(struct dnode_of_data *dn) dn->ofs_in_node = 0; f2fs_truncate_data_blocks(dn); err = truncate_node(dn); - if (err) + if (err) { + f2fs_put_page(page, 1); return err; + } return 1; } From f970b05c9b76b820b2e87eea39f8b2050086f03a Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:43 +0530 Subject: [PATCH 266/509] octeontx2-af: Fix mapping for NIX block from CGX connection [ Upstream commit 2e7bc57b976bb016c6569a54d95c1b8d88f9450a ] Firmware configures NIX block mapping for all MAC blocks. The current implementation reads the configuration and creates the mapping between RVU PF and NIX blocks. But this configuration is only valid for silicons that support multiple blocks. For all other silicons, all MAC blocks map to NIX0. This patch corrects the mapping by adding a check for the same. Fixes: c5a73b632b90 ("octeontx2-af: Map NIX block from CGX connection") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 11 +++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index fc6d785b98dd..ec9a291e866c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -20,6 +20,7 @@ #define PCI_DEVID_OCTEONTX2_RVU_AF 0xA065 /* Subsystem Device ID */ +#define PCI_SUBSYS_DEVID_98XX 0xB100 #define PCI_SUBSYS_DEVID_96XX 0xB200 /* PCI BAR nos */ @@ -403,6 +404,16 @@ static inline bool is_rvu_96xx_B0(struct rvu *rvu) (pdev->subsystem_device == PCI_SUBSYS_DEVID_96XX); } +static inline bool is_rvu_supports_nix1(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_98XX) + return true; + + return false; +} + /* Function Prototypes * RVU */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 6c6b411e78fd..83743e15326d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -84,7 +84,7 @@ static void rvu_map_cgx_nix_block(struct rvu *rvu, int pf, p2x = cgx_lmac_get_p2x(cgx_id, lmac_id); /* Firmware sets P2X_SELECT as either NIX0 or NIX1 */ pfvf->nix_blkaddr = BLKADDR_NIX0; - if (p2x == CMR_P2X_SEL_NIX1) + if (is_rvu_supports_nix1(rvu) && p2x == CMR_P2X_SEL_NIX1) pfvf->nix_blkaddr = BLKADDR_NIX1; } From 9a9d468fdcca6a75f7bdf405dff7a46f53647d64 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 30 Jun 2023 22:47:12 -0700 Subject: [PATCH 267/509] powerpc: allow PPC_EARLY_DEBUG_CPM only when SERIAL_CPM=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 39f49684036d24af800ff194c33c7b2653c591d7 ] In a randconfig with CONFIG_SERIAL_CPM=m and CONFIG_PPC_EARLY_DEBUG_CPM=y, there is a build error: ERROR: modpost: "udbg_putc" [drivers/tty/serial/cpm_uart/cpm_uart.ko] undefined! Prevent the build error by allowing PPC_EARLY_DEBUG_CPM only when SERIAL_CPM=y. Fixes: c374e00e17f1 ("[POWERPC] Add early debug console for CPM serial ports.") Signed-off-by: Randy Dunlap Reviewed-by: Pali RohĆ”r Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20230701054714.30512-1-rdunlap@infradead.org Signed-off-by: Sasha Levin --- arch/powerpc/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 52abca88b5b2..e03fb9154420 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -234,7 +234,7 @@ config PPC_EARLY_DEBUG_40x config PPC_EARLY_DEBUG_CPM bool "Early serial debugging for Freescale CPM-based serial ports" - depends on SERIAL_CPM + depends on SERIAL_CPM=y help Select this to enable early debugging for Freescale chips using a CPM-based serial port. This assumes that the bootwrapper From 73f512bedfd47b5f3b515c6269ea650a55a6f579 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 30 Jun 2023 19:41:18 +0300 Subject: [PATCH 268/509] net: bridge: keep ports without IFF_UNICAST_FLT in BR_PROMISC mode [ Upstream commit 6ca3c005d0604e8d2b439366e3923ea58db99641 ] According to the synchronization rules for .ndo_get_stats() as seen in Documentation/networking/netdevices.rst, acquiring a plain spin_lock() should not be illegal, but the bridge driver implementation makes it so. After running these commands, I am being faced with the following lockdep splat: $ ip link add link swp0 name macsec0 type macsec encrypt on && ip link set swp0 up $ ip link add dev br0 type bridge vlan_filtering 1 && ip link set br0 up $ ip link set macsec0 master br0 && ip link set macsec0 up ======================================================== WARNING: possible irq lock inversion dependency detected 6.4.0-04295-g31b577b4bd4a #603 Not tainted -------------------------------------------------------- swapper/1/0 just changed the state of lock: ffff6bd348724cd8 (&br->lock){+.-.}-{3:3}, at: br_forward_delay_timer_expired+0x34/0x198 but this lock took another, SOFTIRQ-unsafe lock in the past: (&ocelot->stats_lock){+.+.}-{3:3} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Chain exists of: &br->lock --> &br->hash_lock --> &ocelot->stats_lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ocelot->stats_lock); local_irq_disable(); lock(&br->lock); lock(&br->hash_lock); lock(&br->lock); *** DEADLOCK *** (details about the 3 locks skipped) swp0 is instantiated by drivers/net/dsa/ocelot/felix.c, and this only matters to the extent that its .ndo_get_stats64() method calls spin_lock(&ocelot->stats_lock). Documentation/locking/lockdep-design.rst says: | A lock is irq-safe means it was ever used in an irq context, while a lock | is irq-unsafe means it was ever acquired with irq enabled. (...) | Furthermore, the following usage based lock dependencies are not allowed | between any two lock-classes:: | | -> | -> Lockdep marks br->hash_lock as softirq-safe, because it is sometimes taken in softirq context (for example br_fdb_update() which runs in NET_RX softirq), and when it's not in softirq context it blocks softirqs by using spin_lock_bh(). Lockdep marks ocelot->stats_lock as softirq-unsafe, because it never blocks softirqs from running, and it is never taken from softirq context. So it can always be interrupted by softirqs. There is a call path through which a function that holds br->hash_lock: fdb_add_hw_addr() will call a function that acquires ocelot->stats_lock: ocelot_port_get_stats64(). This can be seen below: ocelot_port_get_stats64+0x3c/0x1e0 felix_get_stats64+0x20/0x38 dsa_slave_get_stats64+0x3c/0x60 dev_get_stats+0x74/0x2c8 rtnl_fill_stats+0x4c/0x150 rtnl_fill_ifinfo+0x5cc/0x7b8 rtmsg_ifinfo_build_skb+0xe4/0x150 rtmsg_ifinfo+0x5c/0xb0 __dev_notify_flags+0x58/0x200 __dev_set_promiscuity+0xa0/0x1f8 dev_set_promiscuity+0x30/0x70 macsec_dev_change_rx_flags+0x68/0x88 __dev_set_promiscuity+0x1a8/0x1f8 __dev_set_rx_mode+0x74/0xa8 dev_uc_add+0x74/0xa0 fdb_add_hw_addr+0x68/0xd8 fdb_add_local+0xc4/0x110 br_fdb_add_local+0x54/0x88 br_add_if+0x338/0x4a0 br_add_slave+0x20/0x38 do_setlink+0x3a4/0xcb8 rtnl_newlink+0x758/0x9d0 rtnetlink_rcv_msg+0x2f0/0x550 netlink_rcv_skb+0x128/0x148 rtnetlink_rcv+0x24/0x38 the plain English explanation for it is: The macsec0 bridge port is created without p->flags & BR_PROMISC, because it is what br_manage_promisc() decides for a VLAN filtering bridge with a single auto port. As part of the br_add_if() procedure, br_fdb_add_local() is called for the MAC address of the device, and this results in a call to dev_uc_add() for macsec0 while the softirq-safe br->hash_lock is taken. Because macsec0 does not have IFF_UNICAST_FLT, dev_uc_add() ends up calling __dev_set_promiscuity() for macsec0, which is propagated by its implementation, macsec_dev_change_rx_flags(), to the lower device: swp0. This triggers the call path: dev_set_promiscuity(swp0) -> rtmsg_ifinfo() -> dev_get_stats() -> ocelot_port_get_stats64() with a calling context that lockdep doesn't like (br->hash_lock held). Normally we don't see this, because even though many drivers that can be bridge ports don't support IFF_UNICAST_FLT, we need a driver that (a) doesn't support IFF_UNICAST_FLT, *and* (b) it forwards the IFF_PROMISC flag to another driver, and (c) *that* driver implements ndo_get_stats64() using a softirq-unsafe spinlock. Condition (b) is necessary because the first __dev_set_rx_mode() calls __dev_set_promiscuity() with "bool notify=false", and thus, the rtmsg_ifinfo() code path won't be entered. The same criteria also hold true for DSA switches which don't report IFF_UNICAST_FLT. When the DSA master uses a spin_lock() in its ndo_get_stats64() method, the same lockdep splat can be seen. I think the deadlock possibility is real, even though I didn't reproduce it, and I'm thinking of the following situation to support that claim: fdb_add_hw_addr() runs on a CPU A, in a context with softirqs locally disabled and br->hash_lock held, and may end up attempting to acquire ocelot->stats_lock. In parallel, ocelot->stats_lock is currently held by a thread B (say, ocelot_check_stats_work()), which is interrupted while holding it by a softirq which attempts to lock br->hash_lock. Thread B cannot make progress because br->hash_lock is held by A. Whereas thread A cannot make progress because ocelot->stats_lock is held by B. When taking the issue at face value, the bridge can avoid that problem by simply making the ports promiscuous from a code path with a saner calling context (br->hash_lock not held). A bridge port without IFF_UNICAST_FLT is going to become promiscuous as soon as we call dev_uc_add() on it (which we do unconditionally), so why not be preemptive and make it promiscuous right from the beginning, so as to not be taken by surprise. With this, we've broken the links between code that holds br->hash_lock or br->lock and code that calls into the ndo_change_rx_flags() or ndo_get_stats64() ops of the bridge port. Fixes: 2796d0c648c9 ("bridge: Automatically manage port promiscuous mode.") Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_if.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 1d87bf51f384..e35488fde9c8 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -157,8 +157,9 @@ void br_manage_promisc(struct net_bridge *br) * This lets us disable promiscuous mode and write * this config to hw. */ - if (br->auto_cnt == 0 || - (br->auto_cnt == 1 && br_auto_port(p))) + if ((p->dev->priv_flags & IFF_UNICAST_FLT) && + (br->auto_cnt == 0 || + (br->auto_cnt == 1 && br_auto_port(p)))) br_port_clear_promisc(p); else br_port_set_promisc(p); From b785ba0acc82f894c05ec5ceb0c1c4720972aa8e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Jun 2023 16:41:50 +0000 Subject: [PATCH 269/509] tcp: annotate data races in __tcp_oow_rate_limited() [ Upstream commit 998127cdb4699b9d470a9348ffe9f1154346be5f ] request sockets are lockless, __tcp_oow_rate_limited() could be called on the same object from different cpus. This is harmless. Add READ_ONCE()/WRITE_ONCE() annotations to avoid a KCSAN report. Fixes: 4ce7e93cb3fe ("tcp: rate limit ACK sent by SYN_RECV request sockets") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b98b7920c402..d6dfbb88dcf5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3560,8 +3560,11 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, u32 *last_oow_ack_time) { - if (*last_oow_ack_time) { - s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); + /* Paired with the WRITE_ONCE() in this function. */ + u32 val = READ_ONCE(*last_oow_ack_time); + + if (val) { + s32 elapsed = (s32)(tcp_jiffies32 - val); if (0 <= elapsed && elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { @@ -3570,7 +3573,10 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, } } - *last_oow_ack_time = tcp_jiffies32; + /* Paired with the prior READ_ONCE() and with itself, + * as we might be lockless. + */ + WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32); return false; /* not rate-limited: go ahead, send dupack now! */ } From 2434a6715f599958f233e76e8b37a5b9dd5ba439 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 3 Jul 2023 19:53:29 +0200 Subject: [PATCH 270/509] xsk: Honor SO_BINDTODEVICE on bind [ Upstream commit f7306acec9aae9893d15e745c8791124d42ab10a ] Initial creation of an AF_XDP socket requires CAP_NET_RAW capability. A privileged process might create the socket and pass it to a non-privileged process for later use. However, that process will be able to bind the socket to any network interface. Even though it will not be able to receive any traffic without modification of the BPF map, the situation is not ideal. Sockets already have a mechanism that can be used to restrict what interface they can be attached to. That is SO_BINDTODEVICE. To change the SO_BINDTODEVICE binding the process will need CAP_NET_RAW. Make xsk_bind() honor the SO_BINDTODEVICE in order to allow safer workflow when non-privileged process is using AF_XDP. The intended workflow is following: 1. First process creates a bare socket with socket(AF_XDP, ...). 2. First process loads the XSK program to the interface. 3. First process adds the socket fd to a BPF map. 4. First process ties socket fd to a particular interface using SO_BINDTODEVICE. 5. First process sends socket fd to a second process. 6. Second process allocates UMEM. 7. Second process binds socket to the interface with bind(...). 8. Second process sends/receives the traffic. All the steps above are possible today if the first process is privileged and the second one has sufficient RLIMIT_MEMLOCK and no capabilities. However, the second process will be able to bind the socket to any interface it wants on step 7 and send traffic from it. With the proposed change, the second process will be able to bind the socket only to a specific interface chosen by the first process at step 4. Fixes: 965a99098443 ("xsk: add support for bind for Rx") Signed-off-by: Ilya Maximets Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Acked-by: John Fastabend Acked-by: Jason Wang Link: https://lore.kernel.org/bpf/20230703175329.3259672-1-i.maximets@ovn.org Signed-off-by: Sasha Levin --- Documentation/networking/af_xdp.rst | 9 +++++++++ net/xdp/xsk.c | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index 2ccc5644cc98..70623cb135d3 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -433,6 +433,15 @@ start N bytes into the buffer leaving the first N bytes for the application to use. The final option is the flags field, but it will be dealt with in separate sections for each UMEM flag. +SO_BINDTODEVICE setsockopt +-------------------------- + +This is a generic SOL_SOCKET option that can be used to tie AF_XDP +socket to a particular network interface. It is useful when a socket +is created by a privileged process and passed to a non-privileged one. +Once the option is set, kernel will refuse attempts to bind that socket +to a different interface. Updating the value requires CAP_NET_RAW. + XDP_STATISTICS getsockopt ------------------------- diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 691841dc6d33..d04f91f4d09d 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -667,6 +667,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); struct net_device *dev; + int bound_dev_if; u32 flags, qid; int err = 0; @@ -680,6 +681,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) XDP_USE_NEED_WAKEUP)) return -EINVAL; + bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + if (bound_dev_if && bound_dev_if != sxdp->sxdp_ifindex) + return -EINVAL; + rtnl_lock(); mutex_lock(&xs->mutex); if (xs->state != XSK_READY) { From 0b08ff091f3131a6a35525173d857c5158c08cdc Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 3 Jul 2023 19:08:42 +0800 Subject: [PATCH 271/509] net/sched: act_pedit: Add size check for TCA_PEDIT_PARMS_EX [ Upstream commit 30c45b5361d39b4b793780ffac5538090b9e2eb1 ] The attribute TCA_PEDIT_PARMS_EX is not be included in pedit_policy and one malicious user could fake a TCA_PEDIT_PARMS_EX whose length is smaller than the intended sizeof(struct tc_pedit). Hence, the dereference in tcf_pedit_init() could access dirty heap data. static int tcf_pedit_init(...) { // ... pattr = tb[TCA_PEDIT_PARMS]; // TCA_PEDIT_PARMS is included if (!pattr) pattr = tb[TCA_PEDIT_PARMS_EX]; // but this is not // ... parm = nla_data(pattr); index = parm->index; // parm is able to be smaller than 4 bytes // and this dereference gets dirty skb_buff // data created in netlink_sendmsg } This commit adds TCA_PEDIT_PARMS_EX length in pedit_policy which avoid the above case, just like the TCA_PEDIT_PARMS. Fixes: 71d0ed7079df ("net/act_pedit: Support using offset relative to the conventional network headers") Signed-off-by: Lin Ma Reviewed-by: Pedro Tammela Link: https://lore.kernel.org/r/20230703110842.590282-1-linma@zju.edu.cn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sched/act_pedit.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index db0d3bff19eb..a44101b2f441 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -26,6 +26,7 @@ static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { [TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) }, + [TCA_PEDIT_PARMS_EX] = { .len = sizeof(struct tc_pedit) }, [TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED }, }; From 2758fb81bbc9ad01dab30dd051b7ed936de34cca Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 3 Jul 2023 19:14:46 +0200 Subject: [PATCH 272/509] pptp: Fix fib lookup calls. [ Upstream commit 84bef5b6037c15180ef88ac4216dc621d16df1a6 ] PPTP uses pppox sockets (struct pppox_sock). These sockets don't embed an inet_sock structure, so it's invalid to call inet_sk() on them. Therefore, the ip_route_output_ports() call in pptp_connect() has two problems: * The tos variable is set with RT_CONN_FLAGS(sk), which calls inet_sk() on the pppox socket. * ip_route_output_ports() tries to retrieve routing flags using inet_sk_flowi_flags(), which is also going to call inet_sk() on the pppox socket. While PPTP doesn't use inet sockets, it's actually really layered on top of IP and therefore needs a proper way to do fib lookups. So let's define pptp_route_output() to get a struct rtable from a pptp socket. Let's also replace the ip_route_output_ports() call of pptp_xmit() for consistency. In practice, this means that: * pptp_connect() sets ->flowi4_tos and ->flowi4_flags to zero instead of using bits of unrelated struct pppox_sock fields. * pptp_xmit() now respects ->sk_mark and ->sk_uid. * pptp_xmit() now calls the security_sk_classify_flow() security hook, thus allowing to set ->flowic_secid. * pptp_xmit() now passes the pppox socket to xfrm_lookup_route(). Found by code inspection. Fixes: 00959ade36ac ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ppp/pptp.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index ee5058445d06..05a75b5a8b68 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -128,6 +129,23 @@ static void del_chan(struct pppox_sock *sock) spin_unlock(&chan_lock); } +static struct rtable *pptp_route_output(struct pppox_sock *po, + struct flowi4 *fl4) +{ + struct sock *sk = &po->sk; + struct net *net; + + net = sock_net(sk); + flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 0, + RT_SCOPE_UNIVERSE, IPPROTO_GRE, 0, + po->proto.pptp.dst_addr.sin_addr.s_addr, + po->proto.pptp.src_addr.sin_addr.s_addr, + 0, 0, sock_net_uid(net, sk)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); + + return ip_route_output_flow(net, fl4, sk); +} + static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = (struct sock *) chan->private; @@ -151,11 +169,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (sk_pppox(po)->sk_state & PPPOX_DEAD) goto tx_error; - rt = ip_route_output_ports(net, &fl4, NULL, - opt->dst_addr.sin_addr.s_addr, - opt->src_addr.sin_addr.s_addr, - 0, 0, IPPROTO_GRE, - RT_TOS(0), sk->sk_bound_dev_if); + rt = pptp_route_output(po, &fl4); if (IS_ERR(rt)) goto tx_error; @@ -440,12 +454,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.private = sk; po->chan.ops = &pptp_chan_ops; - rt = ip_route_output_ports(sock_net(sk), &fl4, sk, - opt->dst_addr.sin_addr.s_addr, - opt->src_addr.sin_addr.s_addr, - 0, 0, - IPPROTO_GRE, RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if); + rt = pptp_route_output(po, &fl4); if (IS_ERR(rt)) { error = -EHOSTUNREACH; goto end; From 9f5548e4214d260ed0c9ec0394d7b4bae4015029 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 4 Jul 2023 01:05:44 +0300 Subject: [PATCH 273/509] net: dsa: tag_sja1105: fix MAC DA patching from meta frames [ Upstream commit 1dcf6efd5f0c1f4496b3ef7ec5a7db104a53b38c ] The SJA1105 manual says that at offset 4 into the meta frame payload we have "MAC destination byte 2" and at offset 5 we have "MAC destination byte 1". These are counted from the LSB, so byte 1 is h_dest[ETH_HLEN-2] aka h_dest[4] and byte 2 is h_dest[ETH_HLEN-3] aka h_dest[3]. The sja1105_meta_unpack() function decodes these the other way around, so a frame with MAC DA 01:80:c2:11:22:33 is received by the network stack as having 01:80:c2:22:11:33. Fixes: e53e18a6fe4d ("net: dsa: sja1105: Receive and decode meta frames") Signed-off-by: Vladimir Oltean Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/dsa/tag_sja1105.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 50496013cdb7..07876160edd2 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -48,8 +48,8 @@ static void sja1105_meta_unpack(const struct sk_buff *skb, * a unified unpacking command for both device series. */ packing(buf, &meta->tstamp, 31, 0, 4, UNPACK, 0); - packing(buf + 4, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0); - packing(buf + 5, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0); + packing(buf + 4, &meta->dmac_byte_3, 7, 0, 1, UNPACK, 0); + packing(buf + 5, &meta->dmac_byte_4, 7, 0, 1, UNPACK, 0); packing(buf + 6, &meta->source_port, 7, 0, 1, UNPACK, 0); packing(buf + 7, &meta->switch_id, 7, 0, 1, UNPACK, 0); } From 37750131d2a5a95d2b83e43dbfd8738c72c4eb59 Mon Sep 17 00:00:00 2001 From: Thorsten Winkler Date: Tue, 4 Jul 2023 16:41:21 +0200 Subject: [PATCH 274/509] s390/qeth: Fix vipa deletion [ Upstream commit 80de809bd35e2a8999edf9f5aaa2d8de18921f11 ] Change boolean parameter of function "qeth_l3_vipa_store" inside the "qeth_l3_dev_vipa_del4_store" function from "true" to "false" because "true" is used for adding a virtual ip address and "false" for deleting. Fixes: 2390166a6b45 ("s390/qeth: clean up L3 sysfs code") Reviewed-by: Alexandra Winter Reviewed-by: Wenjia Zhang Signed-off-by: Thorsten Winkler Signed-off-by: Alexandra Winter Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/s390/net/qeth_l3_sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 997fbb7006a7..316f8622f3cc 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -652,7 +652,7 @@ static QETH_DEVICE_ATTR(vipa_add4, add4, 0644, static ssize_t qeth_l3_dev_vipa_del4_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4); + return qeth_l3_vipa_store(dev, buf, false, count, QETH_PROT_IPV4); } static QETH_DEVICE_ATTR(vipa_del4, del4, 0200, NULL, From d1c946552af299f4fa85bf7da15e328123771128 Mon Sep 17 00:00:00 2001 From: Artur Rojek Date: Sat, 27 May 2023 18:44:50 +0200 Subject: [PATCH 275/509] sh: dma: Fix DMA channel offset calculation [ Upstream commit e82e47584847129a20b8c9f4a1dcde09374fb0e0 ] Various SoCs of the SH3, SH4 and SH4A family, which use this driver, feature a differing number of DMA channels, which can be distributed between up to two DMAC modules. The existing implementation fails to correctly accommodate for all those variations, resulting in wrong channel offset calculations and leading to kernel panics. Rewrite dma_base_addr() in order to properly calculate channel offsets in a DMAC module. Fix dmaor_read_reg() and dmaor_write_reg(), so that the correct DMAC module base is selected for the DMAOR register. Fixes: 7f47c7189b3e8f19 ("sh: dma: More legacy cpu dma chainsawing.") Signed-off-by: Artur Rojek Reviewed-by: Geert Uytterhoeven Reviewed-by: John Paul Adrian Glaubitz Link: https://lore.kernel.org/r/20230527164452.64797-2-contact@artur-rojek.eu Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Sasha Levin --- arch/sh/drivers/dma/dma-sh.c | 37 +++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/arch/sh/drivers/dma/dma-sh.c b/arch/sh/drivers/dma/dma-sh.c index 96c626c2cd0a..306fba1564e5 100644 --- a/arch/sh/drivers/dma/dma-sh.c +++ b/arch/sh/drivers/dma/dma-sh.c @@ -18,6 +18,18 @@ #include #include +/* + * Some of the SoCs feature two DMAC modules. In such a case, the channels are + * distributed equally among them. + */ +#ifdef SH_DMAC_BASE1 +#define SH_DMAC_NR_MD_CH (CONFIG_NR_ONCHIP_DMA_CHANNELS / 2) +#else +#define SH_DMAC_NR_MD_CH CONFIG_NR_ONCHIP_DMA_CHANNELS +#endif + +#define SH_DMAC_CH_SZ 0x10 + /* * Define the default configuration for dual address memory-memory transfer. * The 0x400 value represents auto-request, external->external. @@ -29,7 +41,7 @@ static unsigned long dma_find_base(unsigned int chan) unsigned long base = SH_DMAC_BASE0; #ifdef SH_DMAC_BASE1 - if (chan >= 6) + if (chan >= SH_DMAC_NR_MD_CH) base = SH_DMAC_BASE1; #endif @@ -40,13 +52,13 @@ static unsigned long dma_base_addr(unsigned int chan) { unsigned long base = dma_find_base(chan); - /* Normalize offset calculation */ - if (chan >= 9) - chan -= 6; - if (chan >= 4) - base += 0x10; + chan = (chan % SH_DMAC_NR_MD_CH) * SH_DMAC_CH_SZ; - return base + (chan * 0x10); + /* DMAOR is placed inside the channel register space. Step over it. */ + if (chan >= DMAOR) + base += SH_DMAC_CH_SZ; + + return base + chan; } #ifdef CONFIG_SH_DMA_IRQ_MULTI @@ -250,12 +262,11 @@ static int sh_dmac_get_dma_residue(struct dma_channel *chan) #define NR_DMAOR 1 #endif -/* - * DMAOR bases are broken out amongst channel groups. DMAOR0 manages - * channels 0 - 5, DMAOR1 6 - 11 (optional). - */ -#define dmaor_read_reg(n) __raw_readw(dma_find_base((n)*6)) -#define dmaor_write_reg(n, data) __raw_writew(data, dma_find_base(n)*6) +#define dmaor_read_reg(n) __raw_readw(dma_find_base((n) * \ + SH_DMAC_NR_MD_CH) + DMAOR) +#define dmaor_write_reg(n, data) __raw_writew(data, \ + dma_find_base((n) * \ + SH_DMAC_NR_MD_CH) + DMAOR) static inline int dmaor_reset(int no) { From 023bd9dc410cd58e3a7357d18990a0ae377ef33f Mon Sep 17 00:00:00 2001 From: Danila Chernetsov Date: Tue, 4 Apr 2023 19:05:49 +0000 Subject: [PATCH 276/509] apparmor: fix missing error check for rhashtable_insert_fast [ Upstream commit 000518bc5aef25d3f703592a0296d578c98b1517 ] rhashtable_insert_fast() could return err value when memory allocation is failed. but unpack_profile() do not check values and this always returns success value. This patch just adds error check code. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: e025be0f26d5 ("apparmor: support querying extended trusted helper extra data") Signed-off-by: Danila Chernetsov Signed-off-by: John Johansen Signed-off-by: Sasha Levin --- security/apparmor/policy_unpack.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 519656e68582..10896d69c442 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -909,8 +909,13 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } - rhashtable_insert_fast(profile->data, &data->head, - profile->data->p); + if (rhashtable_insert_fast(profile->data, &data->head, + profile->data->p)) { + kfree_sensitive(data->key); + kfree_sensitive(data); + info = "failed to insert data to table"; + goto fail; + } } if (!unpack_nameX(e, AA_STRUCTEND, NULL)) { From b6eefa7a27a6f58e84da80a91f50c60e1a1bb171 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 23 Aug 2021 23:41:42 +0200 Subject: [PATCH 277/509] i2c: xiic: Defer xiic_wakeup() and __xiic_start_xfer() in xiic_process() [ Upstream commit 743e227a895923c37a333eb2ebf3e391f00c406d ] The __xiic_start_xfer() manipulates the interrupt flags, xiic_wakeup() may result in return from xiic_xfer() early. Defer both to the end of the xiic_process() interrupt thread, so that they are executed after all the other interrupt bits handling completed and once it completely safe to perform changes to the interrupt bits in the hardware. Signed-off-by: Marek Vasut Acked-by: Michal Simek Signed-off-by: Wolfram Sang Stable-dep-of: cb6e45c9a0ad ("i2c: xiic: Don't try to handle more interrupt events after error") Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-xiic.c | 37 ++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 3b564e68130b..8b93c22f3c40 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -375,6 +375,9 @@ static irqreturn_t xiic_process(int irq, void *dev_id) struct xiic_i2c *i2c = dev_id; u32 pend, isr, ier; u32 clr = 0; + int xfer_more = 0; + int wakeup_req = 0; + int wakeup_code = 0; /* Get the interrupt Status from the IPIF. There is no clearing of * interrupts in the IPIF. Interrupts must be cleared at the source. @@ -411,10 +414,14 @@ static irqreturn_t xiic_process(int irq, void *dev_id) */ xiic_reinit(i2c); - if (i2c->rx_msg) - xiic_wakeup(i2c, STATE_ERROR); - if (i2c->tx_msg) - xiic_wakeup(i2c, STATE_ERROR); + if (i2c->rx_msg) { + wakeup_req = 1; + wakeup_code = STATE_ERROR; + } + if (i2c->tx_msg) { + wakeup_req = 1; + wakeup_code = STATE_ERROR; + } } if (pend & XIIC_INTR_RX_FULL_MASK) { /* Receive register/FIFO is full */ @@ -448,8 +455,7 @@ static irqreturn_t xiic_process(int irq, void *dev_id) i2c->tx_msg++; dev_dbg(i2c->adap.dev.parent, "%s will start next...\n", __func__); - - __xiic_start_xfer(i2c); + xfer_more = 1; } } } @@ -463,11 +469,13 @@ static irqreturn_t xiic_process(int irq, void *dev_id) if (!i2c->tx_msg) goto out; - if ((i2c->nmsgs == 1) && !i2c->rx_msg && - xiic_tx_space(i2c) == 0) - xiic_wakeup(i2c, STATE_DONE); + wakeup_req = 1; + + if (i2c->nmsgs == 1 && !i2c->rx_msg && + xiic_tx_space(i2c) == 0) + wakeup_code = STATE_DONE; else - xiic_wakeup(i2c, STATE_ERROR); + wakeup_code = STATE_ERROR; } if (pend & (XIIC_INTR_TX_EMPTY_MASK | XIIC_INTR_TX_HALF_MASK)) { /* Transmit register/FIFO is empty or Ā½ empty */ @@ -491,7 +499,7 @@ static irqreturn_t xiic_process(int irq, void *dev_id) if (i2c->nmsgs > 1) { i2c->nmsgs--; i2c->tx_msg++; - __xiic_start_xfer(i2c); + xfer_more = 1; } else { xiic_irq_dis(i2c, XIIC_INTR_TX_HALF_MASK); @@ -509,6 +517,13 @@ static irqreturn_t xiic_process(int irq, void *dev_id) dev_dbg(i2c->adap.dev.parent, "%s clr: 0x%x\n", __func__, clr); xiic_setreg32(i2c, XIIC_IISR_OFFSET, clr); + if (xfer_more) + __xiic_start_xfer(i2c); + if (wakeup_req) + xiic_wakeup(i2c, wakeup_code); + + WARN_ON(xfer_more && wakeup_req); + mutex_unlock(&i2c->lock); return IRQ_HANDLED; } From e71714ad24d8c43bb51050c23997d9456f408a00 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 6 Jun 2023 12:25:58 -0600 Subject: [PATCH 278/509] i2c: xiic: Don't try to handle more interrupt events after error [ Upstream commit cb6e45c9a0ad9e0f8664fd06db0227d185dc76ab ] In xiic_process, it is possible that error events such as arbitration lost or TX error can be raised in conjunction with other interrupt flags such as TX FIFO empty or bus not busy. Error events result in the controller being reset and the error returned to the calling request, but the function could potentially try to keep handling the other events, such as by writing more messages into the TX FIFO. Since the transaction has already failed, this is not helpful and will just cause issues. This problem has been present ever since: commit 7f9906bd7f72 ("i2c: xiic: Service all interrupts in isr") which allowed non-error events to be handled after errors, but became more obvious after: commit 743e227a8959 ("i2c: xiic: Defer xiic_wakeup() and __xiic_start_xfer() in xiic_process()") which reworked the code to add a WARN_ON which triggers if both the xfer_more and wakeup_req flags were set, since this combination is not supposed to happen, but was occurring in this scenario. Skip further interrupt handling after error flags are detected to avoid this problem. Fixes: 7f9906bd7f72 ("i2c: xiic: Service all interrupts in isr") Signed-off-by: Robert Hancock Acked-by: Andi Shyti Signed-off-by: Wolfram Sang Signed-off-by: Sasha Levin --- drivers/i2c/busses/i2c-xiic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 8b93c22f3c40..568e97c3896d 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -422,6 +422,8 @@ static irqreturn_t xiic_process(int irq, void *dev_id) wakeup_req = 1; wakeup_code = STATE_ERROR; } + /* don't try to handle other events */ + goto out; } if (pend & XIIC_INTR_RX_FULL_MASK) { /* Receive register/FIFO is full */ From e41a8e46156105ee91436d4fe9ceaeaa943b372d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 6 Jul 2023 17:53:57 +0200 Subject: [PATCH 279/509] ALSA: jack: Fix mutex call in snd_jack_report() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 89dbb335cb6a627a4067bc42caa09c8bc3326d40 ] snd_jack_report() is supposed to be callable from an IRQ context, too, and it's indeed used in that way from virtsnd driver. The fix for input_dev race in commit 1b6a6fc5280e ("ALSA: jack: Access input_dev under mutex"), however, introduced a mutex lock in snd_jack_report(), and this resulted in a potential sleep-in-atomic. For addressing that problem, this patch changes the relevant code to use the object get/put and removes the mutex usage. That is, snd_jack_report(), it takes input_get_device() and leaves with input_put_device() for assuring the input_dev being assigned. Although the whole mutex could be reduced, we keep it because it can be still a protection for potential races between creation and deletion. Fixes: 1b6a6fc5280e ("ALSA: jack: Access input_dev under mutex") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/cf95f7fe-a748-4990-8378-000491b40329@moroto.mountain Tested-by: Amadeusz Sławiński Cc: Link: https://lore.kernel.org/r/20230706155357.3470-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/jack.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/sound/core/jack.c b/sound/core/jack.c index 45e28db6ea38..8a9baa084191 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -364,6 +364,7 @@ void snd_jack_report(struct snd_jack *jack, int status) { struct snd_jack_kctl *jack_kctl; #ifdef CONFIG_SND_JACK_INPUT_DEV + struct input_dev *idev; int i; #endif @@ -375,30 +376,28 @@ void snd_jack_report(struct snd_jack *jack, int status) status & jack_kctl->mask_bits); #ifdef CONFIG_SND_JACK_INPUT_DEV - mutex_lock(&jack->input_dev_lock); - if (!jack->input_dev) { - mutex_unlock(&jack->input_dev_lock); + idev = input_get_device(jack->input_dev); + if (!idev) return; - } for (i = 0; i < ARRAY_SIZE(jack->key); i++) { int testbit = SND_JACK_BTN_0 >> i; if (jack->type & testbit) - input_report_key(jack->input_dev, jack->key[i], + input_report_key(idev, jack->key[i], status & testbit); } for (i = 0; i < ARRAY_SIZE(jack_switch_types); i++) { int testbit = 1 << i; if (jack->type & testbit) - input_report_switch(jack->input_dev, + input_report_switch(idev, jack_switch_types[i], status & testbit); } - input_sync(jack->input_dev); - mutex_unlock(&jack->input_dev_lock); + input_sync(idev); + input_put_device(idev); #endif /* CONFIG_SND_JACK_INPUT_DEV */ } EXPORT_SYMBOL(snd_jack_report); From 8d36cb6d1aed7d5c9f752f817614a3831e54e11b Mon Sep 17 00:00:00 2001 From: Shuai Jiang Date: Tue, 18 Apr 2023 21:56:12 +0800 Subject: [PATCH 280/509] i2c: qup: Add missing unwind goto in qup_i2c_probe() commit cd9489623c29aa2f8cc07088168afb6e0d5ef06d upstream. Smatch Warns: drivers/i2c/busses/i2c-qup.c:1784 qup_i2c_probe() warn: missing unwind goto? The goto label "fail_runtime" and "fail" will disable qup->pclk, but here qup->pclk failed to obtain, in order to be consistent, change the direct return to goto label "fail_dma". Fixes: 9cedf3b2f099 ("i2c: qup: Add bam dma capabilities") Signed-off-by: Shuai Jiang Reviewed-by: Dongliang Mu Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang Cc: # v4.6+ Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-qup.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 5a47915869ae..576c12670bd8 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1752,16 +1752,21 @@ static int qup_i2c_probe(struct platform_device *pdev) if (!clk_freq || clk_freq > I2C_MAX_FAST_MODE_PLUS_FREQ) { dev_err(qup->dev, "clock frequency not supported %d\n", clk_freq); - return -EINVAL; + ret = -EINVAL; + goto fail_dma; } qup->base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(qup->base)) - return PTR_ERR(qup->base); + if (IS_ERR(qup->base)) { + ret = PTR_ERR(qup->base); + goto fail_dma; + } qup->irq = platform_get_irq(pdev, 0); - if (qup->irq < 0) - return qup->irq; + if (qup->irq < 0) { + ret = qup->irq; + goto fail_dma; + } if (has_acpi_companion(qup->dev)) { ret = device_property_read_u32(qup->dev, @@ -1775,13 +1780,15 @@ static int qup_i2c_probe(struct platform_device *pdev) qup->clk = devm_clk_get(qup->dev, "core"); if (IS_ERR(qup->clk)) { dev_err(qup->dev, "Could not get core clock\n"); - return PTR_ERR(qup->clk); + ret = PTR_ERR(qup->clk); + goto fail_dma; } qup->pclk = devm_clk_get(qup->dev, "iface"); if (IS_ERR(qup->pclk)) { dev_err(qup->dev, "Could not get iface clock\n"); - return PTR_ERR(qup->pclk); + ret = PTR_ERR(qup->pclk); + goto fail_dma; } qup_i2c_enable_clocks(qup); src_clk_freq = clk_get_rate(qup->clk); From 8482ac2e5a261f59675e2f1488fdf332d39c3626 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Tue, 6 Jun 2023 16:41:02 -0700 Subject: [PATCH 281/509] NFSD: add encoding of op_recall flag for write delegation commit 58f5d894006d82ed7335e1c37182fbc5f08c2f51 upstream. Modified nfsd4_encode_open to encode the op_recall flag properly for OPEN result with write delegation granted. Signed-off-by: Dai Ngo Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1e3410a9b03e..c7e8e641d3e5 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3705,7 +3705,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op p = xdr_reserve_space(xdr, 32); if (!p) return nfserr_resource; - *p++ = cpu_to_be32(0); + *p++ = cpu_to_be32(open->op_recall); /* * TODO: space_limit's in delegations From 28e649dc9947e6525c95e32aa9a8e147925e3f56 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 11 Jun 2023 21:14:09 -0600 Subject: [PATCH 282/509] io_uring: wait interruptibly for request completions on exit commit 4826c59453b3b4677d6bf72814e7ababdea86949 upstream. WHen the ring exits, cleanup is done and the final cancelation and waiting on completions is done by io_ring_exit_work. That function is invoked by kworker, which doesn't take any signals. Because of that, it doesn't really matter if we wait for completions in TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE state. However, it does matter to the hung task detection checker! Normally we expect cancelations and completions to happen rather quickly. Some test cases, however, will exit the ring and park the owning task stopped (eg via SIGSTOP). If the owning task needs to run task_work to complete requests, then io_ring_exit_work won't make any progress until the task is runnable again. Hence io_ring_exit_work can trigger the hung task detection, which is particularly problematic if panic-on-hung-task is enabled. As the ring exit doesn't take signals to begin with, have it wait interruptibly rather than uninterruptibly. io_uring has a separate stuck-exit warning that triggers independently anyway, so we're not really missing anything by making this switch. Cc: stable@vger.kernel.org # 5.10+ Link: https://lore.kernel.org/r/b0e4aaef-7088-56ce-244c-976edeac0e66@kernel.dk Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index bce80ed27c4d..25f201fc3ffd 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -9543,7 +9543,18 @@ static void io_ring_exit_work(struct work_struct *work) /* there is little hope left, don't run it too often */ interval = HZ * 60; } - } while (!wait_for_completion_timeout(&ctx->ref_comp, interval)); + /* + * This is really an uninterruptible wait, as it has to be + * complete. But it's also run from a kworker, which doesn't + * take signals, so it's fine to make it interruptible. This + * avoids scenarios where we knowingly can wait much longer + * on completions, for example if someone does a SIGSTOP on + * a task that needs to finish task_work to make this loop + * complete. That's a synthetic situation that should not + * cause a stuck task backtrace, and hence a potential panic + * on stuck tasks if that is enabled. + */ + } while (!wait_for_completion_interruptible_timeout(&ctx->ref_comp, interval)); init_completion(&exit.completion); init_task_work(&exit.task_work, io_tctx_exit_cb); @@ -9568,7 +9579,12 @@ static void io_ring_exit_work(struct work_struct *work) wake_up_process(node->task); mutex_unlock(&ctx->uring_lock); - wait_for_completion(&exit.completion); + /* + * See comment above for + * wait_for_completion_interruptible_timeout() on why this + * wait is marked as interruptible. + */ + wait_for_completion_interruptible(&exit.completion); mutex_lock(&ctx->uring_lock); } mutex_unlock(&ctx->uring_lock); From 6f9708e5c110a88750b3ca3b7013679f149bb1b0 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Mon, 19 Jun 2023 21:35:58 +0200 Subject: [PATCH 283/509] mmc: core: disable TRIM on Kingston EMMC04G-M627 commit f1738a1f816233e6dfc2407f24a31d596643fd90 upstream. It seems that Kingston EMMC04G-M627 despite advertising TRIM support does not work when the core is trying to use REQ_OP_WRITE_ZEROES. We are seeing I/O errors in OpenWrt under 6.1 on Zyxel NBG7815 that we did not previously have and tracked it down to REQ_OP_WRITE_ZEROES. Trying to use fstrim seems to also throw errors like: [93010.835112] I/O error, dev loop0, sector 16902 op 0x3:(DISCARD) flags 0x800 phys_seg 1 prio class 2 Disabling TRIM makes the error go away, so lets add a quirk for this eMMC to disable TRIM. Signed-off-by: Robert Marko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230619193621.437358-1-robimarko@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/quirks.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index c8c0f50a2076..3da45dd05666 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -99,6 +99,13 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("V10016", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc, MMC_QUIRK_TRIM_BROKEN), + /* + * Kingston EMMC04G-M627 advertises TRIM but it does not seems to + * support being used to offload WRITE_ZEROES. + */ + MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, + MMC_QUIRK_TRIM_BROKEN), + /* * Some SD cards reports discard support while they don't */ From 02c8c2b5f68086c2aa7883f23d35a28ed0993725 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Tue, 30 May 2023 23:32:59 +0200 Subject: [PATCH 284/509] mmc: core: disable TRIM on Micron MTFC4GACAJCN-1M commit dbfbddcddcebc9ce8a08757708d4e4a99d238e44 upstream. It seems that Micron MTFC4GACAJCN-1M despite advertising TRIM support does not work when the core is trying to use REQ_OP_WRITE_ZEROES. We are seeing the following errors in OpenWrt under 6.1 on Qnap Qhora 301W that we did not previously have and tracked it down to REQ_OP_WRITE_ZEROES: [ 18.085950] I/O error, dev loop0, sector 596 op 0x9:(WRITE_ZEROES) flags 0x800 phys_seg 0 prio class 2 Disabling TRIM makes the error go away, so lets add a quirk for this eMMC to disable TRIM. Signed-off-by: Robert Marko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230530213259.1776512-1-robimarko@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/quirks.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index 3da45dd05666..afe8d8c5fa8a 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -106,6 +106,13 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("M62704", CID_MANFID_KINGSTON, 0x0100, add_quirk_mmc, MMC_QUIRK_TRIM_BROKEN), + /* + * Micron MTFC4GACAJCN-1M advertises TRIM but it does not seems to + * support being used to offload WRITE_ZEROES. + */ + MMC_FIXUP("Q2J54A", CID_MANFID_MICRON, 0x014e, add_quirk_mmc, + MMC_QUIRK_TRIM_BROKEN), + /* * Some SD cards reports discard support while they don't */ From 191628e2d96ae92d438573028d1084ea3b1b79c4 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 12 Jun 2023 16:37:30 +0200 Subject: [PATCH 285/509] mmc: mmci: Set PROBE_PREFER_ASYNCHRONOUS commit 3108eb2e8aa7e955a9dd3a4c1bf19a7898961822 upstream. All mmc host drivers should have the asynchronous probe option enabled, but it seems like we failed to set it for mmci, so let's do that now. Fixes: 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4") Signed-off-by: Ulf Hansson Tested-by: Linus Walleij Tested-by: Yann Gautier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230612143730.210390-1-ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mmci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 9cfffcecc9eb..a72ca7416138 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -2386,6 +2386,7 @@ static struct amba_driver mmci_driver = { .drv = { .name = DRIVER_NAME, .pm = &mmci_dev_pm_ops, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, .probe = mmci_probe, .remove = mmci_remove, From dc3287206a32e935151be592d15aa587ed60f773 Mon Sep 17 00:00:00 2001 From: Chevron Li Date: Tue, 23 May 2023 19:11:14 +0800 Subject: [PATCH 286/509] mmc: sdhci: fix DMA configure compatibility issue when 64bit DMA mode is used. commit 20dbd07ef0a8bc29eb03d6a95258ac8934cbe52d upstream. Bayhub SD host has hardware limitation: 1.The upper 32bit address is inhibited to be written at SD Host Register [03E][13]=0 (32bits addressing) mode, is admitted to be written only at SD Host Register [03E][13]=1 (64bits addressing) mode. 2.Because of above item#1, need to configure SD Host Register [03E][13] to 1(64bits addressing mode) before set 64bit ADMA system address's higher 32bits SD Host Register [05F~05C] if 64 bits addressing mode is used. The hardware limitation is reasonable for below reasons: 1.Normal flow should set DMA working mode first, then do DMA-transfer-related configuration, such as system address. 2.The hardware limitation may avoid the software to configure wrong higher 32bit address at 32bits addressing mode although it is redundant. The change that set 32bits/64bits addressing mode before set ADMA address, has no side-effect to other host IPs for below reason: The setting order is reasonable and standard: DMA Mode setting first and then DMA address setting. It meets all DMA setting sequence. Signed-off-by: Chevron Li Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230523111114.18124-1-chevron_li@126.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 133f0d376480..bad01cc6823f 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1145,6 +1145,8 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) } } + sdhci_config_dma(host); + if (host->flags & SDHCI_REQ_USE_DMA) { int sg_cnt = sdhci_pre_dma_transfer(host, data, COOKIE_MAPPED); @@ -1164,8 +1166,6 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd) } } - sdhci_config_dma(host); - if (!(host->flags & SDHCI_REQ_USE_DMA)) { int flags; From bcb295778afda4f2feb0d3c0289a53fd43d5a3a6 Mon Sep 17 00:00:00 2001 From: Mingzhe Zou Date: Thu, 15 Jun 2023 20:12:23 +0800 Subject: [PATCH 287/509] bcache: fixup btree_cache_wait list damage commit f0854489fc07d2456f7cc71a63f4faf9c716ffbe upstream. We get a kernel crash about "list_add corruption. next->prev should be prev (ffff9c801bc01210), but was ffff9c77b688237c. (next=ffffae586d8afe68)." crash> struct list_head 0xffff9c801bc01210 struct list_head { next = 0xffffae586d8afe68, prev = 0xffffae586d8afe68 } crash> struct list_head 0xffff9c77b688237c struct list_head { next = 0x0, prev = 0x0 } crash> struct list_head 0xffffae586d8afe68 struct list_head struct: invalid kernel virtual address: ffffae586d8afe68 type: "gdb_readmem_callback" Cannot access memory at address 0xffffae586d8afe68 [230469.019492] Call Trace: [230469.032041] prepare_to_wait+0x8a/0xb0 [230469.044363] ? bch_btree_keys_free+0x6c/0xc0 [escache] [230469.056533] mca_cannibalize_lock+0x72/0x90 [escache] [230469.068788] mca_alloc+0x2ae/0x450 [escache] [230469.080790] bch_btree_node_get+0x136/0x2d0 [escache] [230469.092681] bch_btree_check_thread+0x1e1/0x260 [escache] [230469.104382] ? finish_wait+0x80/0x80 [230469.115884] ? bch_btree_check_recurse+0x1a0/0x1a0 [escache] [230469.127259] kthread+0x112/0x130 [230469.138448] ? kthread_flush_work_fn+0x10/0x10 [230469.149477] ret_from_fork+0x35/0x40 bch_btree_check_thread() and bch_dirty_init_thread() may call mca_cannibalize() to cannibalize other cached btree nodes. Only one thread can do it at a time, so the op of other threads will be added to the btree_cache_wait list. We must call finish_wait() to remove op from btree_cache_wait before free it's memory address. Otherwise, the list will be damaged. Also should call bch_cannibalize_unlock() to release the btree_cache_alloc_lock and wake_up other waiters. Fixes: 8e7102273f59 ("bcache: make bch_btree_check() to be multithreaded") Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") Cc: stable@vger.kernel.org Signed-off-by: Mingzhe Zou Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20230615121223.22502-7-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 11 ++++++++++- drivers/md/bcache/btree.h | 1 + drivers/md/bcache/writeback.c | 10 ++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index b47c00dea0f2..9a5c5fbe9b85 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -885,7 +885,7 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op, * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ -static void bch_cannibalize_unlock(struct cache_set *c) +void bch_cannibalize_unlock(struct cache_set *c) { spin_lock(&c->btree_cannibalize_lock); if (c->btree_cache_alloc_lock == current) { @@ -1968,6 +1968,15 @@ static int bch_btree_check_thread(void *arg) c->gc_stats.nodes++; bch_btree_op_init(&op, 0); ret = bcache_btree(check_recurse, p, c->root, &op); + /* + * The op may be added to cache_set's btree_cache_wait + * in mca_cannibalize(), must ensure it is removed from + * the list and release btree_cache_alloc_lock before + * free op memory. + * Otherwise, the btree_cache_wait will be damaged. + */ + bch_cannibalize_unlock(c); + finish_wait(&c->btree_cache_wait, &(&op)->wait); if (ret) goto out; } diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 1b5fdbc0d83e..a2920bbfcad5 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -282,6 +282,7 @@ void bch_initial_gc_finish(struct cache_set *c); void bch_moving_gc(struct cache_set *c); int bch_btree_check(struct cache_set *c); void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k); +void bch_cannibalize_unlock(struct cache_set *c); static inline void wake_up_gc(struct cache_set *c) { diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 3aa73da2c67b..6324c922f6ba 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -834,6 +834,16 @@ static int bch_root_node_dirty_init(struct cache_set *c, if (ret < 0) pr_warn("sectors dirty init failed, ret=%d!\n", ret); + /* + * The op may be added to cache_set's btree_cache_wait + * in mca_cannibalize(), must ensure it is removed from + * the list and release btree_cache_alloc_lock before + * free op memory. + * Otherwise, the btree_cache_wait will be damaged. + */ + bch_cannibalize_unlock(c); + finish_wait(&c->btree_cache_wait, &(&op.op)->wait); + return ret; } From db9439cef0b5efccf8021fe89f4953e0f901e85b Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 15 Jun 2023 20:12:21 +0800 Subject: [PATCH 288/509] bcache: Remove unnecessary NULL point check in node allocations commit 028ddcac477b691dd9205c92f991cc15259d033e upstream. Due to the previous fix of __bch_btree_node_alloc, the return value will never be a NULL pointer. So IS_ERR is enough to handle the failure situation. Fix it by replacing IS_ERR_OR_NULL check by an IS_ERR check. Fixes: cafe56359144 ("bcache: A block layer cache") Cc: stable@vger.kernel.org Signed-off-by: Zheng Wang Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20230615121223.22502-5-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 10 +++++----- drivers/md/bcache/super.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 9a5c5fbe9b85..941c15a0f32d 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1138,7 +1138,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, { struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); - if (!IS_ERR_OR_NULL(n)) { + if (!IS_ERR(n)) { mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); bkey_copy_key(&n->key, &b->key); @@ -1340,7 +1340,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, memset(new_nodes, 0, sizeof(new_nodes)); closure_init_stack(&cl); - while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) + while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) keys += r[nodes++].keys; blocks = btree_default_blocks(b->c) * 2 / 3; @@ -1352,7 +1352,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, for (i = 0; i < nodes; i++) { new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL); - if (IS_ERR_OR_NULL(new_nodes[i])) + if (IS_ERR(new_nodes[i])) goto out_nocoalesce; } @@ -1487,7 +1487,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, bch_keylist_free(&keylist); for (i = 0; i < nodes; i++) - if (!IS_ERR_OR_NULL(new_nodes[i])) { + if (!IS_ERR(new_nodes[i])) { btree_node_free(new_nodes[i]); rw_unlock(true, new_nodes[i]); } @@ -1669,7 +1669,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, if (should_rewrite) { n = btree_node_alloc_replacement(b, NULL); - if (!IS_ERR_OR_NULL(n)) { + if (!IS_ERR(n)) { bch_btree_node_write_sync(n); bch_btree_set_root(n); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7f5ea2509643..e8c8077667f9 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1748,7 +1748,7 @@ static void cache_set_flush(struct closure *cl) if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); - if (!IS_ERR_OR_NULL(c->root)) + if (!IS_ERR(c->root)) list_add(&c->root->list, &c->btree_cache); /* @@ -2112,7 +2112,7 @@ static int run_cache_set(struct cache_set *c) err = "cannot allocate new btree root"; c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); - if (IS_ERR_OR_NULL(c->root)) + if (IS_ERR(c->root)) goto err; mutex_lock(&c->root->write_lock); From a4405f6ee03323410d7b10966fd67b35f71b1944 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 15 Jun 2023 20:12:22 +0800 Subject: [PATCH 289/509] bcache: Fix __bch_btree_node_alloc to make the failure behavior consistent commit 80fca8a10b604afad6c14213fdfd816c4eda3ee4 upstream. In some specific situations, the return value of __bch_btree_node_alloc may be NULL. This may lead to a potential NULL pointer dereference in caller function like a calling chain : btree_split->bch_btree_node_alloc->__bch_btree_node_alloc. Fix it by initializing the return value in __bch_btree_node_alloc. Fixes: cafe56359144 ("bcache: A block layer cache") Cc: stable@vger.kernel.org Signed-off-by: Zheng Wang Signed-off-by: Coly Li Link: https://lore.kernel.org/r/20230615121223.22502-6-colyli@suse.de Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 941c15a0f32d..24c57bb85b35 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1090,10 +1090,12 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, struct btree *parent) { BKEY_PADDED(key) k; - struct btree *b = ERR_PTR(-EAGAIN); + struct btree *b; mutex_lock(&c->bucket_lock); retry: + /* return ERR_PTR(-EAGAIN) when it fails */ + b = ERR_PTR(-EAGAIN); if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait)) goto err; From 9658a03f80b24f40ff6329b52ffd484145187b6b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 6 Jun 2023 15:24:45 -0700 Subject: [PATCH 290/509] um: Use HOST_DIR for mrproper commit a5a319ec2c2236bb96d147c16196d2f1f3799301 upstream. When HEADER_ARCH was introduced, the MRPROPER_FILES (then MRPROPER_DIRS) list wasn't adjusted, leaving SUBARCH as part of the path argument. This resulted in the "mrproper" target not cleaning up arch/x86/... when SUBARCH was specified. Since HOST_DIR is arch/$(HEADER_ARCH), use it instead to get the correct path. Cc: Richard Weinberger Cc: Anton Ivanov Cc: Johannes Berg Cc: Azeem Shaikh Cc: linux-um@lists.infradead.org Fixes: 7bbe7204e937 ("um: merge Makefile-{i386,x86_64}") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20230606222442.never.807-kees@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/um/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 775615141339..56e5320da762 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -147,7 +147,7 @@ export LDFLAGS_vmlinux := $(LDFLAGS_EXECSTACK) # When cleaning we don't include .config, so we don't include # TT or skas makefiles and don't clean skas_ptregs.h. CLEAN_FILES += linux x.i gmon.out -MRPROPER_FILES += arch/$(SUBARCH)/include/generated +MRPROPER_FILES += $(HOST_DIR)/include/generated archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ From 8bf91a8d4871d7b1a82c76c0ee51f6789ed4f0b2 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 1 Jun 2023 14:42:44 +0800 Subject: [PATCH 291/509] integrity: Fix possible multiple allocation in integrity_inode_get() commit 9df6a4870dc371136e90330cfbbc51464ee66993 upstream. When integrity_inode_get() is querying and inserting the cache, there is a conditional race in the concurrent environment. The race condition is the result of not properly implementing "double-checked locking". In this case, it first checks to see if the iint cache record exists before taking the lock, but doesn't check again after taking the integrity_iint_lock. Fixes: bf2276d10ce5 ("ima: allocating iint improvements") Signed-off-by: Tianjia Zhang Cc: Dmitry Kasatkin Cc: # v3.10+ Signed-off-by: Mimi Zohar Signed-off-by: Greg Kroah-Hartman --- security/integrity/iint.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 0ba01847e836..9ed2d5bfbed5 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -43,12 +43,10 @@ static struct integrity_iint_cache *__integrity_iint_find(struct inode *inode) else if (inode > iint->inode) n = n->rb_right; else - break; + return iint; } - if (!n) - return NULL; - return iint; + return NULL; } /* @@ -121,10 +119,15 @@ struct integrity_iint_cache *integrity_inode_get(struct inode *inode) parent = *p; test_iint = rb_entry(parent, struct integrity_iint_cache, rb_node); - if (inode < test_iint->inode) + if (inode < test_iint->inode) { p = &(*p)->rb_left; - else + } else if (inode > test_iint->inode) { p = &(*p)->rb_right; + } else { + write_unlock(&integrity_iint_lock); + kmem_cache_free(iint_cache, iint); + return test_iint; + } } iint->inode = inode; From 79bef379d55a66deaad5f491af4e4261b61b2470 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 23 May 2023 10:19:35 +0200 Subject: [PATCH 292/509] autofs: use flexible array in ioctl structure commit e910c8e3aa02dc456e2f4c32cb479523c326b534 upstream. Commit df8fc4e934c1 ("kbuild: Enable -fstrict-flex-arrays=3") introduced a warning for the autofs_dev_ioctl structure: In function 'check_name', inlined from 'validate_dev_ioctl' at fs/autofs/dev-ioctl.c:131:9, inlined from '_autofs_dev_ioctl' at fs/autofs/dev-ioctl.c:624:8: fs/autofs/dev-ioctl.c:33:14: error: 'strchr' reading 1 or more bytes from a region of size 0 [-Werror=stringop-overread] 33 | if (!strchr(name, '/')) | ^~~~~~~~~~~~~~~~~ In file included from include/linux/auto_dev-ioctl.h:10, from fs/autofs/autofs_i.h:10, from fs/autofs/dev-ioctl.c:14: include/uapi/linux/auto_dev-ioctl.h: In function '_autofs_dev_ioctl': include/uapi/linux/auto_dev-ioctl.h:112:14: note: source object 'path' of size 0 112 | char path[0]; | ^~~~ This is easily fixed by changing the gnu 0-length array into a c99 flexible array. Since this is a uapi structure, we have to be careful about possible regressions but this one should be fine as they are equivalent here. While it would break building with ancient gcc versions that predate c99, it helps building with --std=c99 and -Wpedantic builds in user space, as well as non-gnu compilers. This means we probably also want it fixed in stable kernels. Cc: stable@vger.kernel.org Cc: Kees Cook Cc: "Gustavo A. R. Silva" Signed-off-by: Arnd Bergmann Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20230523081944.581710-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/autofs-mount-control.rst | 2 +- Documentation/filesystems/autofs.rst | 2 +- include/uapi/linux/auto_dev-ioctl.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/filesystems/autofs-mount-control.rst b/Documentation/filesystems/autofs-mount-control.rst index bf4b511cdbe8..b5a379d25c40 100644 --- a/Documentation/filesystems/autofs-mount-control.rst +++ b/Documentation/filesystems/autofs-mount-control.rst @@ -196,7 +196,7 @@ information and return operation results:: struct args_ismountpoint ismountpoint; }; - char path[0]; + char path[]; }; The ioctlfd field is a mount point file descriptor of an autofs mount diff --git a/Documentation/filesystems/autofs.rst b/Documentation/filesystems/autofs.rst index 681c6a492bc0..1b495768e7aa 100644 --- a/Documentation/filesystems/autofs.rst +++ b/Documentation/filesystems/autofs.rst @@ -467,7 +467,7 @@ Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:: struct args_ismountpoint ismountpoint; }; - char path[0]; + char path[]; }; For the **OPEN_MOUNT** and **IS_MOUNTPOINT** commands, the target diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h index 62e625356dc8..08be539605fc 100644 --- a/include/uapi/linux/auto_dev-ioctl.h +++ b/include/uapi/linux/auto_dev-ioctl.h @@ -109,7 +109,7 @@ struct autofs_dev_ioctl { struct args_ismountpoint ismountpoint; }; - char path[0]; + char path[]; }; static inline void init_autofs_dev_ioctl(struct autofs_dev_ioctl *in) From 5fada375113767b3b57f1b04f7a4fe64ffaa626f Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 7 Jun 2023 18:15:23 +0200 Subject: [PATCH 293/509] shmem: use ramfs_kill_sb() for kill_sb method of ramfs-based tmpfs commit 36ce9d76b0a93bae799e27e4f5ac35478c676592 upstream. As the ramfs-based tmpfs uses ramfs_init_fs_context() for the init_fs_context method, which allocates fc->s_fs_info, use ramfs_kill_sb() to free it and avoid a memory leak. Link: https://lkml.kernel.org/r/20230607161523.2876433-1-roberto.sassu@huaweicloud.com Fixes: c3b1b1cbf002 ("ramfs: add support for "mode=" mount option") Signed-off-by: Roberto Sassu Cc: Hugh Dickins Cc: David Howells Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/ramfs/inode.c | 2 +- include/linux/ramfs.h | 1 + mm/shmem.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index ee179a81b3da..23d2c333066f 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -264,7 +264,7 @@ int ramfs_init_fs_context(struct fs_context *fc) return 0; } -static void ramfs_kill_sb(struct super_block *sb) +void ramfs_kill_sb(struct super_block *sb) { kfree(sb->s_fs_info); kill_litter_super(sb); diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 917528d102c4..d506dc63dd47 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -7,6 +7,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir, umode_t mode, dev_t dev); extern int ramfs_init_fs_context(struct fs_context *fc); +extern void ramfs_kill_sb(struct super_block *sb); #ifdef CONFIG_MMU static inline int diff --git a/mm/shmem.c b/mm/shmem.c index d3d8c5e7a296..cfa8f43cb3a6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4128,7 +4128,7 @@ static struct file_system_type shmem_fs_type = { .name = "tmpfs", .init_fs_context = ramfs_init_fs_context, .parameters = ramfs_fs_parameters, - .kill_sb = kill_litter_super, + .kill_sb = ramfs_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; From 988a5d791156450d8d996b3a0a0a754469aa8961 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 6 May 2023 06:56:12 +0200 Subject: [PATCH 294/509] jffs2: reduce stack usage in jffs2_build_xattr_subsystem() commit 1168f095417643f663caa341211e117db552989f upstream. Use kcalloc() for allocation/flush of 128 pointers table to reduce stack usage. Function now returns -ENOMEM or 0 on success. stackusage Before: ./fs/jffs2/xattr.c:775 jffs2_build_xattr_subsystem 1208 dynamic,bounded After: ./fs/jffs2/xattr.c:775 jffs2_build_xattr_subsystem 192 dynamic,bounded Also update definition when CONFIG_JFFS2_FS_XATTR is not enabled Tested with an MTD mount point and some user set/getfattr. Many current target on OpenWRT also suffer from a compilation warning (that become an error with CONFIG_WERROR) with the following output: fs/jffs2/xattr.c: In function 'jffs2_build_xattr_subsystem': fs/jffs2/xattr.c:887:1: error: the frame size of 1088 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] 887 | } | ^ Using dynamic allocation fix this compilation warning. Fixes: c9f700f840bd ("[JFFS2][XATTR] using 'delete marker' for xdatum/xref deletion") Reported-by: Tim Gardner Reported-by: kernel test robot Reported-by: Ron Economos Reported-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Fabian Frederick Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org Message-Id: <20230506045612.16616-1-ansuelsmth@gmail.com> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/build.c | 5 ++++- fs/jffs2/xattr.c | 13 +++++++++---- fs/jffs2/xattr.h | 4 ++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index 837cd55fd4c5..6ae9d6fefb86 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -211,7 +211,10 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) ic->scan_dents = NULL; cond_resched(); } - jffs2_build_xattr_subsystem(c); + ret = jffs2_build_xattr_subsystem(c); + if (ret) + goto exit; + c->flags &= ~JFFS2_SB_FLAG_BUILDING; dbg_fsbuild("FS build complete\n"); diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index da3e18503c65..acb4492f5970 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -772,10 +772,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c) } #define XREF_TMPHASH_SIZE (128) -void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) +int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) { struct jffs2_xattr_ref *ref, *_ref; - struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE]; + struct jffs2_xattr_ref **xref_tmphash; struct jffs2_xattr_datum *xd, *_xd; struct jffs2_inode_cache *ic; struct jffs2_raw_node_ref *raw; @@ -784,9 +784,12 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING)); + xref_tmphash = kcalloc(XREF_TMPHASH_SIZE, + sizeof(struct jffs2_xattr_ref *), GFP_KERNEL); + if (!xref_tmphash) + return -ENOMEM; + /* Phase.1 : Merge same xref */ - for (i=0; i < XREF_TMPHASH_SIZE; i++) - xref_tmphash[i] = NULL; for (ref=c->xref_temp; ref; ref=_ref) { struct jffs2_xattr_ref *tmp; @@ -884,6 +887,8 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c) "%u of xref (%u dead, %u orphan) found.\n", xdatum_count, xdatum_unchecked_count, xdatum_orphan_count, xref_count, xref_dead_count, xref_orphan_count); + kfree(xref_tmphash); + return 0; } struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h index 720007b2fd65..1b5030a3349d 100644 --- a/fs/jffs2/xattr.h +++ b/fs/jffs2/xattr.h @@ -71,7 +71,7 @@ static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref) #ifdef CONFIG_JFFS2_FS_XATTR extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c); -extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c); +extern int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c); extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c); extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c, @@ -103,7 +103,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t); #else #define jffs2_init_xattr_subsystem(c) -#define jffs2_build_xattr_subsystem(c) +#define jffs2_build_xattr_subsystem(c) (0) #define jffs2_clear_xattr_subsystem(c) #define jffs2_xattr_do_crccheck_inode(c, ic) From 5e7d18a52c889b5a762c7dc8c0242fd2e1cdd195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 7 Jun 2023 19:28:48 +0200 Subject: [PATCH 295/509] fs: avoid empty option when generating legacy mount string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 62176420274db5b5127cd7a0083a9aeb461756ee upstream. As each option string fragment is always prepended with a comma it would happen that the whole string always starts with a comma. This could be interpreted by filesystem drivers as an empty option and may produce errors. For example the NTFS driver from ntfs.ko behaves like this and fails when mounted via the new API. Link: https://github.com/util-linux/util-linux/issues/2298 Signed-off-by: Thomas WeiƟschuh Fixes: 3e1aeb00e6d1 ("vfs: Implement a filesystem superblock creation/configuration context") Cc: stable@vger.kernel.org Message-Id: <20230607-fs-empty-option-v1-1-20c8dbf4671b@weissschuh.net> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/fs_context.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fs_context.c b/fs/fs_context.c index 740322dff4a3..e6ef7e040743 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -543,7 +543,8 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) return -ENOMEM; } - ctx->legacy_data[size++] = ','; + if (size) + ctx->legacy_data[size++] = ','; len = strlen(param->key); memcpy(ctx->legacy_data + size, param->key, len); size += len; From 2b563acd2dfaeef839661716498bfaa484cda9c7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:21 +0200 Subject: [PATCH 296/509] ext4: Remove ext4 locking of moved directory commit 3658840cd363f2be094f5dfd2f0b174a9055dd0f upstream. Remove locking of moved directory in ext4_rename2(). We will take care of it in VFS instead. This effectively reverts commit 0813299c586b ("ext4: Fix possible corruption when moving a directory") and followup fixes. CC: Ted Tso CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-1-jack@suse.cz> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/ext4/namei.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 45f719c1e002..ca3d2a33a08c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3863,19 +3863,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, return retval; } - /* - * We need to protect against old.inode directory getting converted - * from inline directory format into a normal one. - */ - if (S_ISDIR(old.inode->i_mode)) - inode_lock_nested(old.inode, I_MUTEX_NONDIR2); - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, &old.inlined); - if (IS_ERR(old.bh)) { - retval = PTR_ERR(old.bh); - goto unlock_moved_dir; - } + if (IS_ERR(old.bh)) + return PTR_ERR(old.bh); /* * Check for inode number is _not_ due to possible IO errors. @@ -4065,10 +4056,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, brelse(old.bh); brelse(new.bh); -unlock_moved_dir: - if (S_ISDIR(old.inode->i_mode)) - inode_unlock(old.inode); - return retval; } From 4b03f503b730ac44e404fb39f2b0e53603e3745a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:23 +0200 Subject: [PATCH 297/509] Revert "f2fs: fix potential corruption when moving a directory" commit cde3c9d7e2a359e337216855dcb333a19daaa436 upstream. This reverts commit d94772154e524b329a168678836745d2773a6e02. The locking is going to be provided by VFS. CC: Jaegeuk Kim CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-3-jack@suse.cz> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/namei.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 98263180c0ea..72b109685db4 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -969,20 +969,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } - /* - * Copied from ext4_rename: we need to protect against old.inode - * directory getting converted from inline directory format into - * a normal one. - */ - if (S_ISDIR(old_inode->i_mode)) - inode_lock_nested(old_inode, I_MUTEX_NONDIR2); - err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) err = PTR_ERR(old_page); - goto out_unlock_old; + goto out; } if (S_ISDIR(old_inode->i_mode)) { @@ -1090,9 +1082,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_unlock_op(sbi); - if (S_ISDIR(old_inode->i_mode)) - inode_unlock(old_inode); - if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); @@ -1107,9 +1096,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); -out_unlock_old: - if (S_ISDIR(old_inode->i_mode)) - inode_unlock(old_inode); out: if (whiteout) iput(whiteout); From c5b5e72df13dabbf6187305b08453d3b58d1736f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:24 +0200 Subject: [PATCH 298/509] fs: Establish locking order for unrelated directories commit f23ce757185319886ca80c4864ce5f81ac6cc9e9 upstream. Currently the locking order of inode locks for directories that are not in ancestor relationship is not defined because all operations that needed to lock two directories like this were serialized by sb->s_vfs_rename_mutex. However some filesystems need to lock two subdirectories for RENAME_EXCHANGE operations and for this we need the locking order established even for two tree-unrelated directories. Provide a helper function lock_two_inodes() that establishes lock ordering for any two inodes and use it in lock_two_directories(). CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-4-jack@suse.cz> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/inode.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 2 ++ fs/namei.c | 4 ++-- 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 7ec90788d8be..311237e8d359 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1015,6 +1015,48 @@ void discard_new_inode(struct inode *inode) } EXPORT_SYMBOL(discard_new_inode); +/** + * lock_two_inodes - lock two inodes (may be regular files but also dirs) + * + * Lock any non-NULL argument. The caller must make sure that if he is passing + * in two directories, one is not ancestor of the other. Zero, one or two + * objects may be locked by this function. + * + * @inode1: first inode to lock + * @inode2: second inode to lock + * @subclass1: inode lock subclass for the first lock obtained + * @subclass2: inode lock subclass for the second lock obtained + */ +void lock_two_inodes(struct inode *inode1, struct inode *inode2, + unsigned subclass1, unsigned subclass2) +{ + if (!inode1 || !inode2) { + /* + * Make sure @subclass1 will be used for the acquired lock. + * This is not strictly necessary (no current caller cares) but + * let's keep things consistent. + */ + if (!inode1) + swap(inode1, inode2); + goto lock; + } + + /* + * If one object is directory and the other is not, we must make sure + * to lock directory first as the other object may be its child. + */ + if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) { + if (inode1 > inode2) + swap(inode1, inode2); + } else if (!S_ISDIR(inode1->i_mode)) + swap(inode1, inode2); +lock: + if (inode1) + inode_lock_nested(inode1, subclass1); + if (inode2 && inode2 != inode1) + inode_lock_nested(inode2, subclass2); +} + /** * lock_two_nondirectories - take two i_mutexes on non-directory objects * diff --git a/fs/internal.h b/fs/internal.h index d5d9fcdae10c..39193250a4ed 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -150,6 +150,8 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); extern void inode_add_lru(struct inode *inode); extern int dentry_needs_remove_privs(struct dentry *dentry); bool in_group_or_capable(const struct inode *inode, kgid_t gid); +void lock_two_inodes(struct inode *inode1, struct inode *inode2, + unsigned subclass1, unsigned subclass2); /* * fs-writeback.c diff --git a/fs/namei.c b/fs/namei.c index 3d98db9802a7..be5cedfd6926 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2782,8 +2782,8 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) return p; } - inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); - inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); + lock_two_inodes(p1->d_inode, p2->d_inode, + I_MUTEX_PARENT, I_MUTEX_PARENT2); return NULL; } EXPORT_SYMBOL(lock_rename); From 59efb86711051d33388b9615990a83cc4452d961 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 1 Jun 2023 12:58:25 +0200 Subject: [PATCH 299/509] fs: Lock moved directories commit 28eceeda130f5058074dd007d9c59d2e8bc5af2e upstream. When a directory is moved to a different directory, some filesystems (udf, ext4, ocfs2, f2fs, and likely gfs2, reiserfs, and others) need to update their pointer to the parent and this must not race with other operations on the directory. Lock the directories when they are moved. Although not all filesystems need this locking, we perform it in vfs_rename() because getting the lock ordering right is really difficult and we don't want to expose these locking details to filesystems. CC: stable@vger.kernel.org Signed-off-by: Jan Kara Message-Id: <20230601105830.13168-5-jack@suse.cz> Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- .../filesystems/directory-locking.rst | 26 ++++++++++--------- fs/namei.c | 22 ++++++++++------ 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst index 504ba940c36c..dccd61c7c5c3 100644 --- a/Documentation/filesystems/directory-locking.rst +++ b/Documentation/filesystems/directory-locking.rst @@ -22,12 +22,11 @@ exclusive. 3) object removal. Locking rules: caller locks parent, finds victim, locks victim and calls the method. Locks are exclusive. -4) rename() that is _not_ cross-directory. Locking rules: caller locks -the parent and finds source and target. In case of exchange (with -RENAME_EXCHANGE in flags argument) lock both. In any case, -if the target already exists, lock it. If the source is a non-directory, -lock it. If we need to lock both, lock them in inode pointer order. -Then call the method. All locks are exclusive. +4) rename() that is _not_ cross-directory. Locking rules: caller locks the +parent and finds source and target. We lock both (provided they exist). If we +need to lock two inodes of different type (dir vs non-dir), we lock directory +first. If we need to lock two inodes of the same type, lock them in inode +pointer order. Then call the method. All locks are exclusive. NB: we might get away with locking the source (and target in exchange case) shared. @@ -44,15 +43,17 @@ All locks are exclusive. rules: * lock the filesystem - * lock parents in "ancestors first" order. + * lock parents in "ancestors first" order. If one is not ancestor of + the other, lock them in inode pointer order. * find source and target. * if old parent is equal to or is a descendent of target fail with -ENOTEMPTY * if new parent is equal to or is a descendent of source fail with -ELOOP - * If it's an exchange, lock both the source and the target. - * If the target exists, lock it. If the source is a non-directory, - lock it. If we need to lock both, do so in inode pointer order. + * Lock both the source and the target provided they exist. If we + need to lock two inodes of different type (dir vs non-dir), we lock + the directory first. If we need to lock two inodes of the same type, + lock them in inode pointer order. * call the method. All ->i_rwsem are taken exclusive. Again, we might get away with locking @@ -66,8 +67,9 @@ If no directory is its own ancestor, the scheme above is deadlock-free. Proof: - First of all, at any moment we have a partial ordering of the - objects - A < B iff A is an ancestor of B. + First of all, at any moment we have a linear ordering of the + objects - A < B iff (A is an ancestor of B) or (B is not an ancestor + of A and ptr(A) < ptr(B)). That ordering can change. However, the following is true: diff --git a/fs/namei.c b/fs/namei.c index be5cedfd6926..fa7e5511d7c8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4264,7 +4264,7 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. * c) we have to lock _four_ objects - parents and victim (if it exists), - * and source (if it is not a directory). + * and source. * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change @@ -4341,10 +4341,16 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); - if (!is_dir || (flags & RENAME_EXCHANGE)) - lock_two_nondirectories(source, target); - else if (target) - inode_lock(target); + /* + * Lock all moved children. Moved directories may need to change parent + * pointer so they need the lock to prevent against concurrent + * directory changes moving parent pointer. For regular files we've + * historically always done this. The lockdep locking subclasses are + * somewhat arbitrary but RENAME_EXCHANGE in particular can swap + * regular files and directories so it's difficult to tell which + * subclasses to use. + */ + lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2); error = -EBUSY; if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry)) @@ -4388,9 +4394,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, d_exchange(old_dentry, new_dentry); } out: - if (!is_dir || (flags & RENAME_EXCHANGE)) - unlock_two_nondirectories(source, target); - else if (target) + if (source) + inode_unlock(source); + if (target) inode_unlock(target); dput(new_dentry); if (!error) { From a3fbd156bd2cd16e3c64e250ebce33eb9f2ef612 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 5 Jun 2023 16:49:45 -0700 Subject: [PATCH 300/509] btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile commit 160fe8f6fdb13da6111677be6263e5d65e875987 upstream. Callers of `btrfs_reduce_alloc_profile` expect it to return exactly one allocation profile flag, and failing to do so may ultimately result in a WARN_ON and remount-ro when allocating new blocks, like the below transaction abort on 6.1. `btrfs_reduce_alloc_profile` has two ways of determining the profile, first it checks if a conversion balance is currently running and uses the profile we're converting to. If no balance is currently running, it returns the max-redundancy profile which at least one block in the selected block group has. This works by simply checking each known allocation profile bit in redundancy order. However, `btrfs_reduce_alloc_profile` has not been updated as new flags have been added - first with the `DUP` profile and later with the RAID1C34 profiles. Because of the way it checks, if we have blocks with different profiles and at least one is known, that profile will be selected. However, if none are known we may return a flag set with multiple allocation profiles set. This is currently only possible when a balance from one of the three unhandled profiles to another of the unhandled profiles is canceled after allocating at least one block using the new profile. In that case, a transaction abort like the below will occur and the filesystem will need to be mounted with -o skip_balance to get it mounted rw again (but the balance cannot be resumed without a similar abort). [770.648] ------------[ cut here ]------------ [770.648] BTRFS: Transaction aborted (error -22) [770.648] WARNING: CPU: 43 PID: 1159593 at fs/btrfs/extent-tree.c:4122 find_free_extent+0x1d94/0x1e00 [btrfs] [770.648] CPU: 43 PID: 1159593 Comm: btrfs Tainted: G W 6.1.0-0.deb11.7-powerpc64le #1 Debian 6.1.20-2~bpo11+1a~test [770.648] Hardware name: T2P9D01 REV 1.00 POWER9 0x4e1202 opal:skiboot-bc106a0 PowerNV [770.648] NIP: c00800000f6784fc LR: c00800000f6784f8 CTR: c000000000d746c0 [770.648] REGS: c000200089afe9a0 TRAP: 0700 Tainted: G W (6.1.0-0.deb11.7-powerpc64le Debian 6.1.20-2~bpo11+1a~test) [770.648] MSR: 9000000002029033 CR: 28848282 XER: 20040000 [770.648] CFAR: c000000000135110 IRQMASK: 0 GPR00: c00800000f6784f8 c000200089afec40 c00800000f7ea800 0000000000000026 GPR04: 00000001004820c2 c000200089afea00 c000200089afe9f8 0000000000000027 GPR08: c000200ffbfe7f98 c000000002127f90 ffffffffffffffd8 0000000026d6a6e8 GPR12: 0000000028848282 c000200fff7f3800 5deadbeef0000122 c00000002269d000 GPR16: c0002008c7797c40 c000200089afef17 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000001 c000200008bc5a98 0000000000000001 GPR24: 0000000000000000 c0000003c73088d0 c000200089afef17 c000000016d3a800 GPR28: c0000003c7308800 c00000002269d000 ffffffffffffffea 0000000000000001 [770.648] NIP [c00800000f6784fc] find_free_extent+0x1d94/0x1e00 [btrfs] [770.648] LR [c00800000f6784f8] find_free_extent+0x1d90/0x1e00 [btrfs] [770.648] Call Trace: [770.648] [c000200089afec40] [c00800000f6784f8] find_free_extent+0x1d90/0x1e00 [btrfs] (unreliable) [770.648] [c000200089afed30] [c00800000f681398] btrfs_reserve_extent+0x1a0/0x2f0 [btrfs] [770.648] [c000200089afeea0] [c00800000f681bf0] btrfs_alloc_tree_block+0x108/0x670 [btrfs] [770.648] [c000200089afeff0] [c00800000f66bd68] __btrfs_cow_block+0x170/0x850 [btrfs] [770.648] [c000200089aff100] [c00800000f66c58c] btrfs_cow_block+0x144/0x288 [btrfs] [770.648] [c000200089aff1b0] [c00800000f67113c] btrfs_search_slot+0x6b4/0xcb0 [btrfs] [770.648] [c000200089aff2a0] [c00800000f679f60] lookup_inline_extent_backref+0x128/0x7c0 [btrfs] [770.648] [c000200089aff3b0] [c00800000f67b338] lookup_extent_backref+0x70/0x190 [btrfs] [770.648] [c000200089aff470] [c00800000f67b54c] __btrfs_free_extent+0xf4/0x1490 [btrfs] [770.648] [c000200089aff5a0] [c00800000f67d770] __btrfs_run_delayed_refs+0x328/0x1530 [btrfs] [770.648] [c000200089aff740] [c00800000f67ea2c] btrfs_run_delayed_refs+0xb4/0x3e0 [btrfs] [770.648] [c000200089aff800] [c00800000f699aa4] btrfs_commit_transaction+0x8c/0x12b0 [btrfs] [770.648] [c000200089aff8f0] [c00800000f6dc628] reset_balance_state+0x1c0/0x290 [btrfs] [770.648] [c000200089aff9a0] [c00800000f6e2f7c] btrfs_balance+0x1164/0x1500 [btrfs] [770.648] [c000200089affb40] [c00800000f6f8e4c] btrfs_ioctl+0x2b54/0x3100 [btrfs] [770.648] [c000200089affc80] [c00000000053be14] sys_ioctl+0x794/0x1310 [770.648] [c000200089affd70] [c00000000002af98] system_call_exception+0x138/0x250 [770.648] [c000200089affe10] [c00000000000c654] system_call_common+0xf4/0x258 [770.648] --- interrupt: c00 at 0x7fff94126800 [770.648] NIP: 00007fff94126800 LR: 0000000107e0b594 CTR: 0000000000000000 [770.648] REGS: c000200089affe80 TRAP: 0c00 Tainted: G W (6.1.0-0.deb11.7-powerpc64le Debian 6.1.20-2~bpo11+1a~test) [770.648] MSR: 900000000000d033 CR: 24002848 XER: 00000000 [770.648] IRQMASK: 0 GPR00: 0000000000000036 00007fffc9439da0 00007fff94217100 0000000000000003 GPR04: 00000000c4009420 00007fffc9439ee8 0000000000000000 0000000000000000 GPR08: 00000000803c7416 0000000000000000 0000000000000000 0000000000000000 GPR12: 0000000000000000 00007fff9467d120 0000000107e64c9c 0000000107e64d0a GPR16: 0000000107e64d06 0000000107e64cf1 0000000107e64cc4 0000000107e64c73 GPR20: 0000000107e64c31 0000000107e64bf1 0000000107e64be7 0000000000000000 GPR24: 0000000000000000 00007fffc9439ee0 0000000000000003 0000000000000001 GPR28: 00007fffc943f713 0000000000000000 00007fffc9439ee8 0000000000000000 [770.648] NIP [00007fff94126800] 0x7fff94126800 [770.648] LR [0000000107e0b594] 0x107e0b594 [770.648] --- interrupt: c00 [770.648] Instruction dump: [770.648] 3b00ffe4 e8898828 481175f5 60000000 4bfff4fc 3be00000 4bfff570 3d220000 [770.648] 7fc4f378 e8698830 4811cd95 e8410018 <0fe00000> f9c10060 f9e10068 fa010070 [770.648] ---[ end trace 0000000000000000 ]--- [770.648] BTRFS: error (device dm-2: state A) in find_free_extent_update_loop:4122: errno=-22 unknown [770.648] BTRFS info (device dm-2: state EA): forced readonly [770.648] BTRFS: error (device dm-2: state EA) in __btrfs_free_extent:3070: errno=-22 unknown [770.648] BTRFS error (device dm-2: state EA): failed to run delayed ref for logical 17838685708288 num_bytes 24576 type 184 action 2 ref_mod 1: -22 [770.648] BTRFS: error (device dm-2: state EA) in btrfs_run_delayed_refs:2144: errno=-22 unknown [770.648] BTRFS: error (device dm-2: state EA) in reset_balance_state:3599: errno=-22 unknown Fixes: 47e6f7423b91 ("btrfs: add support for 3-copy replication (raid1c3)") Fixes: 8d6fac0087e5 ("btrfs: add support for 4-copy replication (raid1c4)") CC: stable@vger.kernel.org # 5.10+ Signed-off-by: Matt Corallo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index d0fecbd28232..c4e3c1a5de05 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -77,14 +77,21 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags) } allowed &= flags; - if (allowed & BTRFS_BLOCK_GROUP_RAID6) + /* Select the highest-redundancy RAID level. */ + if (allowed & BTRFS_BLOCK_GROUP_RAID1C4) + allowed = BTRFS_BLOCK_GROUP_RAID1C4; + else if (allowed & BTRFS_BLOCK_GROUP_RAID6) allowed = BTRFS_BLOCK_GROUP_RAID6; + else if (allowed & BTRFS_BLOCK_GROUP_RAID1C3) + allowed = BTRFS_BLOCK_GROUP_RAID1C3; else if (allowed & BTRFS_BLOCK_GROUP_RAID5) allowed = BTRFS_BLOCK_GROUP_RAID5; else if (allowed & BTRFS_BLOCK_GROUP_RAID10) allowed = BTRFS_BLOCK_GROUP_RAID10; else if (allowed & BTRFS_BLOCK_GROUP_RAID1) allowed = BTRFS_BLOCK_GROUP_RAID1; + else if (allowed & BTRFS_BLOCK_GROUP_DUP) + allowed = BTRFS_BLOCK_GROUP_DUP; else if (allowed & BTRFS_BLOCK_GROUP_RAID0) allowed = BTRFS_BLOCK_GROUP_RAID0; From 6819bb0b8552dcc5f82ca606c8911b8c67e0628f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Jun 2023 17:21:47 +0100 Subject: [PATCH 301/509] btrfs: fix race when deleting quota root from the dirty cow roots list commit b31cb5a6eb7a48b0a7bfdf06832b1fd5088d8c79 upstream. When disabling quotas we are deleting the quota root from the list fs_info->dirty_cowonly_roots without taking the lock that protects it, which is struct btrfs_fs_info::trans_lock. This unsynchronized list manipulation may cause chaos if there's another concurrent manipulation of this list, such as when adding a root to it with ctree.c:add_root_to_dirty_list(). This can result in all sorts of weird failures caused by a race, such as the following crash: [337571.278245] general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] PREEMPT SMP PTI [337571.278933] CPU: 1 PID: 115447 Comm: btrfs Tainted: G W 6.4.0-rc6-btrfs-next-134+ #1 [337571.279153] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [337571.279572] RIP: 0010:commit_cowonly_roots+0x11f/0x250 [btrfs] [337571.279928] Code: 85 38 06 00 (...) [337571.280363] RSP: 0018:ffff9f63446efba0 EFLAGS: 00010206 [337571.280582] RAX: ffff942d98ec2638 RBX: ffff9430b82b4c30 RCX: 0000000449e1c000 [337571.280798] RDX: dead000000000100 RSI: ffff9430021e4900 RDI: 0000000000036070 [337571.281015] RBP: ffff942d98ec2000 R08: ffff942d98ec2000 R09: 000000000000015b [337571.281254] R10: 0000000000000009 R11: 0000000000000001 R12: ffff942fe8fbf600 [337571.281476] R13: ffff942dabe23040 R14: ffff942dabe20800 R15: ffff942d92cf3b48 [337571.281723] FS: 00007f478adb7340(0000) GS:ffff94349fa40000(0000) knlGS:0000000000000000 [337571.281950] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [337571.282184] CR2: 00007f478ab9a3d5 CR3: 000000001e02c001 CR4: 0000000000370ee0 [337571.282416] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [337571.282647] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [337571.282874] Call Trace: [337571.283101] [337571.283327] ? __die_body+0x1b/0x60 [337571.283570] ? die_addr+0x39/0x60 [337571.283796] ? exc_general_protection+0x22e/0x430 [337571.284022] ? asm_exc_general_protection+0x22/0x30 [337571.284251] ? commit_cowonly_roots+0x11f/0x250 [btrfs] [337571.284531] btrfs_commit_transaction+0x42e/0xf90 [btrfs] [337571.284803] ? _raw_spin_unlock+0x15/0x30 [337571.285031] ? release_extent_buffer+0x103/0x130 [btrfs] [337571.285305] reset_balance_state+0x152/0x1b0 [btrfs] [337571.285578] btrfs_balance+0xa50/0x11e0 [btrfs] [337571.285864] ? __kmem_cache_alloc_node+0x14a/0x410 [337571.286086] btrfs_ioctl+0x249a/0x3320 [btrfs] [337571.286358] ? mod_objcg_state+0xd2/0x360 [337571.286577] ? refill_obj_stock+0xb0/0x160 [337571.286798] ? seq_release+0x25/0x30 [337571.287016] ? __rseq_handle_notify_resume+0x3ba/0x4b0 [337571.287235] ? percpu_counter_add_batch+0x2e/0xa0 [337571.287455] ? __x64_sys_ioctl+0x88/0xc0 [337571.287675] __x64_sys_ioctl+0x88/0xc0 [337571.287901] do_syscall_64+0x38/0x90 [337571.288126] entry_SYSCALL_64_after_hwframe+0x72/0xdc [337571.288352] RIP: 0033:0x7f478aaffe9b So fix this by locking struct btrfs_fs_info::trans_lock before deleting the quota root from that list. Fixes: bed92eae26cc ("Btrfs: qgroup implementation and prototypes") CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 3fc689154bb5..94edc74c8979 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1270,7 +1270,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) goto out; } + spin_lock(&fs_info->trans_lock); list_del("a_root->dirty_list); + spin_unlock(&fs_info->trans_lock); btrfs_tree_lock(quota_root->node); btrfs_clean_tree_block(quota_root->node); From 1dac8584be0c33cd209f5dc04fdc4ef1e45804a9 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Mon, 12 Jun 2023 11:05:32 +0200 Subject: [PATCH 302/509] ASoC: mediatek: mt8173: Fix irq error path commit f9c058d14f4fe23ef523a7ff73734d51c151683c upstream. After reordering the irq probe, the error path was not properly done. Lets fix it. Reported-by: Dan Carpenter Cc: stable@kernel.org Fixes: 4cbb264d4e91 ("ASoC: mediatek: mt8173: Enable IRQ when pdata is ready") Signed-off-by: Ricardo Ribalda Delgado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230612-mt8173-fixup-v2-2-432aa99ce24d@chromium.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/mediatek/mt8173/mt8173-afe-pcm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c index 619d6733091c..cf8f66c2f289 100644 --- a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c +++ b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c @@ -1072,6 +1072,10 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) afe->dev = &pdev->dev; + irq_id = platform_get_irq(pdev, 0); + if (irq_id <= 0) + return irq_id < 0 ? irq_id : -ENXIO; + afe->base_addr = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(afe->base_addr)) return PTR_ERR(afe->base_addr); @@ -1177,14 +1181,11 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) if (ret) goto err_cleanup_components; - irq_id = platform_get_irq(pdev, 0); - if (irq_id <= 0) - return irq_id < 0 ? irq_id : -ENXIO; ret = devm_request_irq(afe->dev, irq_id, mt8173_afe_irq_handler, 0, "Afe_ISR_Handle", (void *)afe); if (ret) { dev_err(afe->dev, "could not request_irq\n"); - goto err_pm_disable; + goto err_cleanup_components; } dev_info(&pdev->dev, "MT8173 AFE driver initialized.\n"); From 9b0f7940e212134441fb783517ba33e0f03281f7 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Delgado Date: Mon, 12 Jun 2023 11:05:31 +0200 Subject: [PATCH 303/509] ASoC: mediatek: mt8173: Fix snd_soc_component_initialize error path commit a46d37012a5be1737393b8f82fd35665e4556eee upstream. If the second component fails to initialize, cleanup the first on. Reported-by: Dan Carpenter Cc: stable@kernel.org Fixes: f1b5bf07365d ("ASoC: mt2701/mt8173: replace platform to component") Signed-off-by: Ricardo Ribalda Delgado Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230612-mt8173-fixup-v2-1-432aa99ce24d@chromium.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/mediatek/mt8173/mt8173-afe-pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c index cf8f66c2f289..3f6d9659ee95 100644 --- a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c +++ b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c @@ -1162,14 +1162,14 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev) comp_hdmi = devm_kzalloc(&pdev->dev, sizeof(*comp_hdmi), GFP_KERNEL); if (!comp_hdmi) { ret = -ENOMEM; - goto err_pm_disable; + goto err_cleanup_components; } ret = snd_soc_component_initialize(comp_hdmi, &mt8173_afe_hdmi_dai_component, &pdev->dev); if (ret) - goto err_pm_disable; + goto err_cleanup_components; #ifdef CONFIG_DEBUG_FS comp_hdmi->debugfs_prefix = "hdmi"; From 5d6fbb624576bdd29f8d95e7085abf9c5cb5c007 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 16 May 2023 17:31:05 +0200 Subject: [PATCH 304/509] ARM: orion5x: fix d2net gpio initialization commit f8ef1233939495c405a9faa4bd1ae7d3f581bae4 upstream. The DT version of this board has a custom file with the gpio device. However, it does nothing because the d2net_init() has no caller or prototype: arch/arm/mach-orion5x/board-d2net.c:101:13: error: no previous prototype for 'd2net_init' Call it from the board-dt file as intended. Fixes: 94b0bd366e36 ("ARM: orion5x: convert d2net to Device Tree") Reviewed-by: Andrew Lunn Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230516153109.514251-10-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-orion5x/board-dt.c | 3 +++ arch/arm/mach-orion5x/common.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/arch/arm/mach-orion5x/board-dt.c b/arch/arm/mach-orion5x/board-dt.c index 3d36f1d95196..3f651df3a71c 100644 --- a/arch/arm/mach-orion5x/board-dt.c +++ b/arch/arm/mach-orion5x/board-dt.c @@ -63,6 +63,9 @@ static void __init orion5x_dt_init(void) if (of_machine_is_compatible("maxtor,shared-storage-2")) mss2_init(); + if (of_machine_is_compatible("lacie,d2-network")) + d2net_init(); + of_platform_default_populate(NULL, orion5x_auxdata_lookup, NULL); } diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h index eb96009e21c4..b9cfdb456456 100644 --- a/arch/arm/mach-orion5x/common.h +++ b/arch/arm/mach-orion5x/common.h @@ -75,6 +75,12 @@ extern void mss2_init(void); static inline void mss2_init(void) {} #endif +#ifdef CONFIG_MACH_D2NET_DT +void d2net_init(void); +#else +static inline void d2net_init(void) {} +#endif + /***************************************************************************** * Helpers to access Orion registers ****************************************************************************/ From 66a0647cdc56b91ecc10712327aa503bfed57310 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 19 Apr 2023 23:07:39 +0200 Subject: [PATCH 305/509] leds: trigger: netdev: Recheck NETDEV_LED_MODE_LINKUP on dev rename commit cee4bd16c3195a701be683f7da9e88c6e11acb73 upstream. Dev can be renamed also while up for supported device. We currently wrongly clear the NETDEV_LED_MODE_LINKUP flag on NETDEV_CHANGENAME event. Fix this by rechecking if the carrier is ok on NETDEV_CHANGENAME and correctly set the NETDEV_LED_MODE_LINKUP bit. Fixes: 5f820ed52371 ("leds: trigger: netdev: fix handling on interface rename") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Christian Marangi Reviewed-by: Andrew Lunn Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230419210743.3594-2-ansuelsmth@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/leds/trigger/ledtrig-netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index d5e774d83021..f4d670ec30bc 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -318,6 +318,9 @@ static int netdev_trig_notify(struct notifier_block *nb, clear_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode); switch (evt) { case NETDEV_CHANGENAME: + if (netif_carrier_ok(dev)) + set_bit(NETDEV_LED_MODE_LINKUP, &trigger_data->mode); + fallthrough; case NETDEV_REGISTER: if (trigger_data->net_dev) dev_put(trigger_data->net_dev); From 5cb46b80ecdae8dc83ee957cad41238393eae326 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 Jul 2023 16:49:11 +0200 Subject: [PATCH 306/509] fs: no need to check source commit 66d8fc0539b0d49941f313c9509a8384e4245ac1 upstream. The @source inode must be valid. It is even checked via IS_SWAPFILE() above making it pretty clear. So no need to check it when we unlock. What doesn't need to exist is the @target inode. The lock_two_inodes() helper currently swaps the @inode1 and @inode2 arguments if @inode1 is NULL to have consistent lock class usage. However, we know that at least for vfs_rename() that @inode1 is @source and thus is never NULL as per above. We also know that @source is a different inode than @target as that is checked right at the beginning of vfs_rename(). So we know that @source is valid and locked and that @target is locked. So drop the check whether @source is non-NULL. Fixes: 28eceeda130f ("fs: Lock moved directories") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202307030026.9sE2pk2x-lkp@intel.com Message-Id: <20230703-vfs-rename-source-v1-1-37eebb29b65b@kernel.org> [brauner: use commit message from patch I sent concurrently] Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index fa7e5511d7c8..bc5e633a5954 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4394,8 +4394,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, d_exchange(old_dentry, new_dentry); } out: - if (source) - inode_unlock(source); + inode_unlock(source); if (target) inode_unlock(target); dput(new_dentry); From 5bf73af8b3820e2cd381efb6231dca1b273ed238 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 29 Jun 2023 07:20:44 +0300 Subject: [PATCH 307/509] fanotify: disallow mount/sb marks on kernel internal pseudo fs commit 69562eb0bd3e6bb8e522a7b254334e0fb30dff0c upstream. Hopefully, nobody is trying to abuse mount/sb marks for watching all anonymous pipes/inodes. I cannot think of a good reason to allow this - it looks like an oversight that dated back to the original fanotify API. Link: https://lore.kernel.org/linux-fsdevel/20230628101132.kvchg544mczxv2pm@quack3/ Fixes: 0ff21db9fcc3 ("fanotify: hooks the fanotify_mark syscall to the vfsmount code") Signed-off-by: Amir Goldstein Reviewed-by: Christian Brauner Signed-off-by: Jan Kara Message-Id: <20230629042044.25723-1-amir73il@gmail.com> [backport to 5.x.y] Signed-off-by: Amir Goldstein Signed-off-by: Greg Kroah-Hartman --- fs/notify/fanotify/fanotify_user.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 18e014fa0648..84de9f97bbc0 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1090,8 +1090,11 @@ static int fanotify_test_fid(struct path *path, __kernel_fsid_t *fsid) return 0; } -static int fanotify_events_supported(struct path *path, __u64 mask) +static int fanotify_events_supported(struct path *path, __u64 mask, + unsigned int flags) { + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of @@ -1103,6 +1106,21 @@ static int fanotify_events_supported(struct path *path, __u64 mask) if (mask & FANOTIFY_PERM_EVENTS && path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) return -EINVAL; + + /* + * mount and sb marks are not allowed on kernel internal pseudo fs, + * like pipe_mnt, because that would subscribe to events on all the + * anonynous pipes in the system. + * + * SB_NOUSER covers all of the internal pseudo fs whose objects are not + * exposed to user's mount namespace, but there are other SB_KERNMOUNT + * fs, like nsfs, debugfs, for which the value of allowing sb and mount + * mark is questionable. For now we leave them alone. + */ + if (mark_type != FAN_MARK_INODE && + path->mnt->mnt_sb->s_flags & SB_NOUSER) + return -EINVAL; + return 0; } @@ -1218,7 +1236,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; if (flags & FAN_MARK_ADD) { - ret = fanotify_events_supported(&path, mask); + ret = fanotify_events_supported(&path, mask, flags); if (ret) goto path_put_and_out; } From 88bffb61bc037884c23a72359785d4854b23407b Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Thu, 24 Nov 2022 14:55:35 +0100 Subject: [PATCH 308/509] tpm, tpm_tis: Claim locality in interrupt handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0e069265bce5a40c4eee52e2364bbbd4dabee94a upstream. Writing the TPM_INT_STATUS register in the interrupt handler to clear the interrupts only has effect if a locality is held. Since this is not guaranteed at the time the interrupt is fired, claim the locality explicitly in the handler. Signed-off-by: Lino Sanfilippo Tested-by: Michael Niewƶhner Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 512c867495ea..365761055df3 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -731,7 +731,9 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id) wake_up_interruptible(&priv->int_queue); /* Clear interrupts handled with TPM_EOI */ + tpm_tis_request_locality(chip, 0); rc = tpm_tis_write32(priv, TPM_INT_STATUS(priv->locality), interrupt); + tpm_tis_relinquish_locality(chip, 0); if (rc < 0) return IRQ_NONE; From f0aec6c403a0508165e8e8e4bc8213dc567af3a7 Mon Sep 17 00:00:00 2001 From: Gilad Reti Date: Wed, 13 Jan 2021 07:38:08 +0200 Subject: [PATCH 309/509] selftests/bpf: Add verifier test for PTR_TO_MEM spill commit 4237e9f4a96228ccc8a7abe5e4b30834323cd353 upstream. Add a test to check that the verifier is able to recognize spilling of PTR_TO_MEM registers, by reserving a ringbuf buffer, forcing the spill of a pointer holding the buffer address to the stack, filling it back in from the stack and writing to the memory area pointed by it. The patch was partially contributed by CyberArk Software, Inc. Signed-off-by: Gilad Reti Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210113053810.13518-2-gilad.reti@gmail.com Cc: Lorenz Bauer Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 12 +++++++- .../selftests/bpf/verifier/spill_fill.c | 30 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 0fb92d9a319b..961c17b4681e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -50,7 +50,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 20 +#define MAX_NR_MAPS 21 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -87,6 +87,7 @@ struct bpf_test { int fixup_sk_storage_map[MAX_FIXUPS]; int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; + int fixup_map_ringbuf[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t insn_processed; @@ -640,6 +641,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_sk_storage_map = test->fixup_sk_storage_map; int *fixup_map_event_output = test->fixup_map_event_output; int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; + int *fixup_map_ringbuf = test->fixup_map_ringbuf; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -817,6 +819,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_reuseport_array++; } while (*fixup_map_reuseport_array); } + if (*fixup_map_ringbuf) { + map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0, + 0, 4096); + do { + prog[*fixup_map_ringbuf].imm = map_fds[20]; + fixup_map_ringbuf++; + } while (*fixup_map_ringbuf); + } } struct libcap { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 45d43bf82f26..0b943897aaf6 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -28,6 +28,36 @@ .result = ACCEPT, .result_unpriv = ACCEPT, }, +{ + "check valid spill/fill, ptr to mem", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = ACCEPT, + .result_unpriv = ACCEPT, +}, { "check corrupted spill/fill", .insns = { From 87410743b548f66e937e927a59f17de5a2c9fc4a Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Wed, 21 Jun 2023 08:17:25 +1200 Subject: [PATCH 310/509] block: add overflow checks for Amiga partition support commit b6f3f28f604ba3de4724ad82bea6adb1300c0b5f upstream. The Amiga partition parser module uses signed int for partition sector address and count, which will overflow for disks larger than 1 TB. Use u64 as type for sector address and size to allow using disks up to 2 TB without LBD support, and disks larger than 2 TB with LBD. The RBD format allows to specify disk sizes up to 2^128 bytes (though native OS limitations reduce this somewhat, to max 2^68 bytes), so check for u64 overflow carefully to protect against overflowing sector_t. Bail out if sector addresses overflow 32 bits on kernels without LBD support. This bug was reported originally in 2012, and the fix was created by the RDB author, Joanne Dow . A patch had been discussed and reviewed on linux-m68k at that time but never officially submitted (now resubmitted as patch 1 in this series). This patch adds additional error checking and warning messages. Reported-by: Martin Steigerwald Closes: https://bugzilla.kernel.org/show_bug.cgi?id=43511 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Message-ID: <201206192146.09327.Martin@lichtvoll.de> Cc: # 5.2 Signed-off-by: Michael Schmitz Reviewed-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230620201725.7020-4-schmitzmic@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/partitions/amiga.c | 103 ++++++++++++++++++++++++++++++++------- 1 file changed, 85 insertions(+), 18 deletions(-) diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c index 64f0844468aa..7abc09d8fc73 100644 --- a/block/partitions/amiga.c +++ b/block/partitions/amiga.c @@ -11,10 +11,18 @@ #define pr_fmt(fmt) fmt #include +#include +#include #include #include "check.h" +/* magic offsets in partition DosEnvVec */ +#define NR_HD 3 +#define NR_SECT 5 +#define LO_CYL 9 +#define HI_CYL 10 + static __inline__ u32 checksum_block(__be32 *m, int size) { @@ -31,9 +39,12 @@ int amiga_partition(struct parsed_partitions *state) unsigned char *data; struct RigidDiskBlock *rdb; struct PartitionBlock *pb; - sector_t start_sect, nr_sects; - int blk, part, res = 0; - int blksize = 1; /* Multiplier for disk block size */ + u64 start_sect, nr_sects; + sector_t blk, end_sect; + u32 cylblk; /* rdb_CylBlocks = nr_heads*sect_per_track */ + u32 nr_hd, nr_sect, lo_cyl, hi_cyl; + int part, res = 0; + unsigned int blksize = 1; /* Multiplier for disk block size */ int slot = 1; char b[BDEVNAME_SIZE]; @@ -42,7 +53,7 @@ int amiga_partition(struct parsed_partitions *state) goto rdb_done; data = read_part_sector(state, blk, §); if (!data) { - pr_err("Dev %s: unable to read RDB block %d\n", + pr_err("Dev %s: unable to read RDB block %llu\n", bdevname(state->bdev, b), blk); res = -1; goto rdb_done; @@ -59,12 +70,12 @@ int amiga_partition(struct parsed_partitions *state) *(__be32 *)(data+0xdc) = 0; if (checksum_block((__be32 *)data, be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) { - pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n", + pr_err("Trashed word at 0xd0 in block %llu ignored in checksum calculation\n", blk); break; } - pr_err("Dev %s: RDB in block %d has bad checksum\n", + pr_err("Dev %s: RDB in block %llu has bad checksum\n", bdevname(state->bdev, b), blk); } @@ -81,10 +92,15 @@ int amiga_partition(struct parsed_partitions *state) blk = be32_to_cpu(rdb->rdb_PartitionList); put_dev_sector(sect); for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { - blk *= blksize; /* Read in terms partition table understands */ + /* Read in terms partition table understands */ + if (check_mul_overflow(blk, (sector_t) blksize, &blk)) { + pr_err("Dev %s: overflow calculating partition block %llu! Skipping partitions %u and beyond\n", + bdevname(state->bdev, b), blk, part); + break; + } data = read_part_sector(state, blk, §); if (!data) { - pr_err("Dev %s: unable to read partition block %d\n", + pr_err("Dev %s: unable to read partition block %llu\n", bdevname(state->bdev, b), blk); res = -1; goto rdb_done; @@ -96,19 +112,70 @@ int amiga_partition(struct parsed_partitions *state) if (checksum_block((__be32 *)pb, be32_to_cpu(pb->pb_SummedLongs) & 0x7F) != 0 ) continue; - /* Tell Kernel about it */ + /* RDB gives us more than enough rope to hang ourselves with, + * many times over (2^128 bytes if all fields max out). + * Some careful checks are in order, so check for potential + * overflows. + * We are multiplying four 32 bit numbers to one sector_t! + */ + + nr_hd = be32_to_cpu(pb->pb_Environment[NR_HD]); + nr_sect = be32_to_cpu(pb->pb_Environment[NR_SECT]); + + /* CylBlocks is total number of blocks per cylinder */ + if (check_mul_overflow(nr_hd, nr_sect, &cylblk)) { + pr_err("Dev %s: heads*sects %u overflows u32, skipping partition!\n", + bdevname(state->bdev, b), cylblk); + continue; + } + + /* check for consistency with RDB defined CylBlocks */ + if (cylblk > be32_to_cpu(rdb->rdb_CylBlocks)) { + pr_warn("Dev %s: cylblk %u > rdb_CylBlocks %u!\n", + bdevname(state->bdev, b), cylblk, + be32_to_cpu(rdb->rdb_CylBlocks)); + } + + /* RDB allows for variable logical block size - + * normalize to 512 byte blocks and check result. + */ + + if (check_mul_overflow(cylblk, blksize, &cylblk)) { + pr_err("Dev %s: partition %u bytes per cyl. overflows u32, skipping partition!\n", + bdevname(state->bdev, b), part); + continue; + } + + /* Calculate partition start and end. Limit of 32 bit on cylblk + * guarantees no overflow occurs if LBD support is enabled. + */ + + lo_cyl = be32_to_cpu(pb->pb_Environment[LO_CYL]); + start_sect = ((u64) lo_cyl * cylblk); + + hi_cyl = be32_to_cpu(pb->pb_Environment[HI_CYL]); + nr_sects = (((u64) hi_cyl - lo_cyl + 1) * cylblk); - nr_sects = ((sector_t)be32_to_cpu(pb->pb_Environment[10]) + 1 - - be32_to_cpu(pb->pb_Environment[9])) * - be32_to_cpu(pb->pb_Environment[3]) * - be32_to_cpu(pb->pb_Environment[5]) * - blksize; if (!nr_sects) continue; - start_sect = (sector_t)be32_to_cpu(pb->pb_Environment[9]) * - be32_to_cpu(pb->pb_Environment[3]) * - be32_to_cpu(pb->pb_Environment[5]) * - blksize; + + /* Warn user if partition end overflows u32 (AmigaDOS limit) */ + + if ((start_sect + nr_sects) > UINT_MAX) { + pr_warn("Dev %s: partition %u (%llu-%llu) needs 64 bit device support!\n", + bdevname(state->bdev, b), part, + start_sect, start_sect + nr_sects); + } + + if (check_add_overflow(start_sect, nr_sects, &end_sect)) { + pr_err("Dev %s: partition %u (%llu-%llu) needs LBD device support, skipping partition!\n", + bdevname(state->bdev, b), part, + start_sect, end_sect); + continue; + } + + /* Tell Kernel about it */ + put_partition(state,slot++,start_sect,nr_sects); { /* Be even more informative to aid mounting */ From 01248dd65155baf43721a15995d53f88cabfbca2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Sep 2021 15:50:04 +0200 Subject: [PATCH 311/509] sh: pgtable-3level: Fix cast to pointer from integer of different size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8518e694203d0bfd202ea4a80356785b6992322e upstream. If X2TLB=y (CPU_SHX2=y or CPU_SHX3=y, e.g. migor_defconfig), pgd_t.pgd is "unsigned long long", causing: In file included from arch/sh/include/asm/pgtable.h:13, from include/linux/pgtable.h:6, from include/linux/mm.h:33, from arch/sh/kernel/asm-offsets.c:14: arch/sh/include/asm/pgtable-3level.h: In function ā€˜pud_pgtableā€™: arch/sh/include/asm/pgtable-3level.h:37:9: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] 37 | return (pmd_t *)pud_val(pud); | ^ Fix this by adding an intermediate cast to "unsigned long", which is basically what the old code did before. Fixes: 9cf6fa2458443118 ("mm: rename pud_page_vaddr to pud_pgtable and make it return pmd_t *") Signed-off-by: Geert Uytterhoeven Tested-by: Daniel Palmer Acked-by: Rob Landley Tested-by: John Paul Adrian Glaubitz Signed-off-by: Rich Felker Signed-off-by: Greg Kroah-Hartman --- arch/sh/include/asm/pgtable-3level.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 56bf35c2f29c..cdced80a7ffa 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -34,7 +34,7 @@ typedef struct { unsigned long long pmd; } pmd_t; static inline pmd_t *pud_pgtable(pud_t pud) { - return (pmd_t *)pud_val(pud); + return (pmd_t *)(unsigned long)pud_val(pud); } /* only used by the stubbed out hugetlb gup code, should never be called */ From 3f51f1157f67e9f16602db2694492a3b0e9aec2e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Jul 2023 10:48:49 +0200 Subject: [PATCH 312/509] netfilter: nf_tables: use net_generic infra for transaction data [ Upstream commit 0854db2aaef3fcdd3498a9d299c60adea2aa3dc6 ] This moves all nf_tables pernet data from struct net to a net_generic extension, with the exception of the gencursor. The latter is used in the data path and also outside of the nf_tables core. All others are only used from the configuration plane. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 10 + include/net/netns/nftables.h | 7 - net/netfilter/nf_tables_api.c | 382 ++++++++++++++++++------------ net/netfilter/nf_tables_offload.c | 30 ++- net/netfilter/nft_chain_filter.c | 11 +- net/netfilter/nft_dynset.c | 6 +- 6 files changed, 279 insertions(+), 167 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 030237f3d82a..5e184c20bc55 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1535,4 +1535,14 @@ void nf_tables_trans_destroy_flush_work(void); int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); __be64 nf_jiffies64_to_msecs(u64 input); +struct nftables_pernet { + struct list_head tables; + struct list_head commit_list; + struct list_head module_list; + struct list_head notify_list; + struct mutex commit_mutex; + unsigned int base_seq; + u8 validate_state; +}; + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 6c0806bd8d1e..8c77832d0240 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -5,14 +5,7 @@ #include struct netns_nftables { - struct list_head tables; - struct list_head commit_list; - struct list_head module_list; - struct list_head notify_list; - struct mutex commit_mutex; - unsigned int base_seq; u8 gencursor; - u8 validate_state; }; #endif diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e59cad1f7a36..e3865f29ae83 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -21,10 +21,13 @@ #include #include #include +#include #include #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) +unsigned int nf_tables_net_id __read_mostly; + static LIST_HEAD(nf_tables_expressions); static LIST_HEAD(nf_tables_objects); static LIST_HEAD(nf_tables_flowtables); @@ -103,7 +106,9 @@ static const u8 nft2audit_op[NFT_MSG_MAX] = { // enum nf_tables_msg_types static void nft_validate_state_update(struct net *net, u8 new_validate_state) { - switch (net->nft.validate_state) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); break; @@ -114,7 +119,7 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state) return; } - net->nft.validate_state = new_validate_state; + nft_net->validate_state = new_validate_state; } static void nf_tables_trans_destroy_work(struct work_struct *w); static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); @@ -170,13 +175,15 @@ static void nft_trans_destroy(struct nft_trans *trans) static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) { + struct nftables_pernet *nft_net; struct net *net = ctx->net; struct nft_trans *trans; if (!nft_set_is_anonymous(set)) return; - list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) @@ -270,6 +277,14 @@ static void nf_tables_unregister_hook(struct net *net, nf_unregister_net_hook(net, &basechain->ops); } +static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) +{ + struct nftables_pernet *nft_net; + + nft_net = net_generic(net, nf_tables_net_id); + list_add_tail(&trans->list, &nft_net->commit_list); +} + static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; @@ -281,7 +296,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) if (msg_type == NFT_MSG_NEWTABLE) nft_activate_next(ctx->net, ctx->table); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -314,7 +329,7 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) } } - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } @@ -387,7 +402,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return trans; } @@ -453,7 +468,7 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -485,7 +500,7 @@ static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type, nft_activate_next(ctx->net, obj); nft_trans_obj(trans) = obj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -519,7 +534,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); nft_trans_flowtable(trans) = flowtable; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } @@ -547,13 +562,15 @@ static struct nft_table *nft_table_lookup(const struct net *net, const struct nlattr *nla, u8 family, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry_rcu(table, &net->nft.tables, list, - lockdep_is_held(&net->nft.commit_mutex)) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list, + lockdep_is_held(&nft_net->commit_mutex)) { if (!nla_strcmp(nla, table->name) && table->family == family && nft_active_genmask(table, genmask)) @@ -567,9 +584,11 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net; struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(table, &nft_net->tables, list) { if (be64_to_cpu(nla_get_be64(nla)) == table->handle && nft_active_genmask(table, genmask)) return table; @@ -621,6 +640,7 @@ struct nft_module_request { static int nft_request_module(struct net *net, const char *fmt, ...) { char module_name[MODULE_NAME_LEN]; + struct nftables_pernet *nft_net; struct nft_module_request *req; va_list args; int ret; @@ -631,7 +651,8 @@ static int nft_request_module(struct net *net, const char *fmt, ...) if (ret >= MODULE_NAME_LEN) return 0; - list_for_each_entry(req, &net->nft.module_list, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry(req, &nft_net->module_list, list) { if (!strcmp(req->module, module_name)) { if (req->done) return 0; @@ -647,7 +668,7 @@ static int nft_request_module(struct net *net, const char *fmt, ...) req->done = false; strlcpy(req->module, module_name, MODULE_NAME_LEN); - list_add_tail(&req->list, &net->nft.module_list); + list_add_tail(&req->list, &nft_net->module_list); return -EAGAIN; } @@ -685,7 +706,9 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, static __be16 nft_base_seq(const struct net *net) { - return htons(net->nft.base_seq & 0xffff); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return htons(nft_net->base_seq & 0xffff); } static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { @@ -743,6 +766,7 @@ static void nft_notify_enqueue(struct sk_buff *skb, bool report, static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) { + struct nftables_pernet *nft_net; struct sk_buff *skb; int err; @@ -761,7 +785,8 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(ctx->net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -771,15 +796,17 @@ static int nf_tables_dump_tables(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nftables_pernet *nft_net; const struct nft_table *table; unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -954,7 +981,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) goto err; nft_trans_table_update(trans) = true; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: nft_trans_destroy(trans); @@ -1017,6 +1044,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -1026,7 +1054,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, u32 flags = 0; int err; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask); if (IS_ERR(table)) { @@ -1084,7 +1112,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (err < 0) goto err_trans; - list_add_tail_rcu(&table->list, &net->nft.tables); + list_add_tail_rcu(&table->list, &nft_net->tables); return 0; err_trans: rhltable_destroy(&table->chains_ht); @@ -1172,11 +1200,12 @@ static int nft_flush_table(struct nft_ctx *ctx) static int nft_flush(struct nft_ctx *ctx, int family) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_table *table, *nt; const struct nlattr * const *nla = ctx->nla; int err = 0; - list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) { + list_for_each_entry_safe(table, nt, &nft_net->tables, list) { if (family != AF_UNSPEC && table->family != family) continue; @@ -1291,7 +1320,9 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask) static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING - return lockdep_is_held(&net->nft.commit_mutex); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + return lockdep_is_held(&nft_net->commit_mutex); #else return true; #endif @@ -1494,6 +1525,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) { + struct nftables_pernet *nft_net; struct sk_buff *skb; int err; @@ -1513,7 +1545,8 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(ctx->net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -1528,11 +1561,13 @@ static int nf_tables_dump_chains(struct sk_buff *skb, unsigned int idx = 0, s_idx = cb->args[0]; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -1847,11 +1882,12 @@ static int nft_chain_parse_hook(struct net *net, struct nft_chain_hook *hook, u8 family, bool autoload) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlattr *ha[NFTA_HOOK_MAX + 1]; const struct nft_chain_type *type; int err; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); err = nla_parse_nested_deprecated(ha, NFTA_HOOK_MAX, @@ -2244,6 +2280,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, if (nla[NFTA_CHAIN_HANDLE] && nla[NFTA_CHAIN_NAME]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_trans *tmp; char *name; @@ -2253,7 +2290,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, goto err; err = -EEXIST; - list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) { + list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && tmp->ctx.table == table && nft_trans_chain_update(tmp) && @@ -2267,7 +2304,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, nft_trans_chain_name(trans) = name; } - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err: @@ -2280,10 +2317,11 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, const struct nft_table *table, const struct nlattr *nla) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWCHAIN && @@ -2299,6 +2337,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; @@ -2310,7 +2349,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, u64 handle = 0; u32 flags = 0; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); if (IS_ERR(table)) { @@ -2848,6 +2887,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, static void nf_tables_rule_notify(const struct nft_ctx *ctx, const struct nft_rule *rule, int event) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; int err; @@ -2867,7 +2907,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -2925,11 +2965,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb, unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -3161,6 +3203,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); const struct nfgenmsg *nfmsg = nlmsg_data(nlh); u8 genmask = nft_genmask_next(net); struct nft_expr_info *info = NULL; @@ -3178,7 +3221,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, int err, rem; u64 handle, pos_handle; - lockdep_assert_held(&net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); if (IS_ERR(table)) { @@ -3351,7 +3394,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, kvfree(info); chain->use++; - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, table); if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { @@ -3381,10 +3424,11 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, const struct nft_chain *chain, const struct nlattr *nla) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); struct nft_trans *trans; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { struct nft_rule *rule = nft_trans_rule(trans); if (trans->msg_type == NFT_MSG_NEWRULE && @@ -3497,13 +3541,14 @@ nft_select_set_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, enum nft_set_policies policy) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); const struct nft_set_ops *ops, *bops; struct nft_set_estimate est, best; const struct nft_set_type *type; u32 flags = 0; int i; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); lockdep_nfnl_nft_mutex_not_held(); if (nla[NFTA_SET_FLAGS] != NULL) @@ -3641,10 +3686,11 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, const struct nft_table *table, const struct nlattr *nla, u8 genmask) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET) { struct nft_set *set = nft_trans_set(trans); @@ -3867,6 +3913,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, const struct nft_set *set, int event, gfp_t gfp_flags) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; u32 portid = ctx->portid; int err; @@ -3885,7 +3932,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -3898,14 +3945,16 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; struct net *net = sock_net(skb->sk); struct nft_ctx *ctx = cb->data, ctx_set; + struct nftables_pernet *nft_net; if (cb->args[1]) return skb->len; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (ctx->family != NFPROTO_UNSPEC && ctx->family != table->family) continue; @@ -4706,6 +4755,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct nft_set_dump_ctx *dump_ctx = cb->data; struct net *net = sock_net(skb->sk); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_set *set; struct nft_set_dump_args args; @@ -4716,7 +4766,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) int event; rcu_read_lock(); - list_for_each_entry_rcu(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (dump_ctx->ctx.family != NFPROTO_UNSPEC && dump_ctx->ctx.family != table->family) continue; @@ -4995,6 +5046,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set_elem *elem, int event, u16 flags) { + struct nftables_pernet *nft_net; struct net *net = ctx->net; u32 portid = ctx->portid; struct sk_buff *skb; @@ -5014,7 +5066,8 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_net = net_generic(net, nf_tables_net_id); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -5410,7 +5463,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_set_full: @@ -5441,6 +5494,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, const struct nlattr * const nla[], struct netlink_ext_ack *extack) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; @@ -5470,7 +5524,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, return err; } - if (net->nft.validate_state == NFT_VALIDATE_DO) + if (nft_net->validate_state == NFT_VALIDATE_DO) return nft_table_validate(net, ctx.table); return 0; @@ -5606,7 +5660,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, nft_set_elem_deactivate(ctx->net, set, &elem); nft_trans_elem(trans) = elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; fail_ops: @@ -5640,7 +5694,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, nft_set_elem_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err1: @@ -5939,7 +5993,7 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, nft_trans_obj(trans) = obj; nft_trans_obj_update(trans) = true; nft_trans_obj_newobj(trans) = newobj; - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -6102,6 +6156,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) struct nft_obj_filter *filter = cb->data; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; + struct nftables_pernet *nft_net; struct nft_object *obj; bool reset = false; @@ -6109,9 +6164,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) reset = true; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -6134,7 +6190,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, - net->nft.base_seq); + nft_net->base_seq); audit_log_nfcfg(buf, family, @@ -6255,8 +6311,11 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk, reset = true; if (reset) { - char *buf = kasprintf(GFP_ATOMIC, "%s:%u", - table->name, net->nft.base_seq); + const struct nftables_pernet *nft_net; + char *buf; + + nft_net = net_generic(net, nf_tables_net_id); + buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); audit_log_nfcfg(buf, family, @@ -6341,10 +6400,11 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct sk_buff *skb; int err; char *buf = kasprintf(gfp, "%s:%u", - table->name, net->nft.base_seq); + table->name, nft_net->base_seq); audit_log_nfcfg(buf, family, @@ -6370,7 +6430,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, goto err; } - nft_notify_enqueue(skb, report, &net->nft.notify_list); + nft_notify_enqueue(skb, report, &nft_net->notify_list); return; err: nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -6706,7 +6766,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); list_splice(&flowtable_hook.list, &nft_trans_flowtable_hooks(trans)); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -6896,7 +6956,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx, list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans)); nft_flowtable_hook_release(&flowtable_hook); - list_add_tail(&trans->list, &ctx->net->nft.commit_list); + nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7022,12 +7082,14 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb, struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; const struct nft_table *table; rcu_read_lock(); - cb->seq = net->nft.base_seq; + nft_net = net_generic(net, nf_tables_net_id); + cb->seq = nft_net->base_seq; - list_for_each_entry_rcu(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &nft_net->tables, list) { if (family != NFPROTO_UNSPEC && family != table->family) continue; @@ -7162,6 +7224,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, struct list_head *hook_list, int event) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct sk_buff *skb; int err; @@ -7181,7 +7244,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, goto err; } - nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list); + nft_notify_enqueue(skb, ctx->report, &nft_net->notify_list); return; err: nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); @@ -7206,6 +7269,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable) static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nlmsghdr *nlh; char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); @@ -7215,7 +7279,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, if (!nlh) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) || + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) || nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; @@ -7250,6 +7314,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_flowtable *flowtable; + struct nftables_pernet *nft_net; struct nft_table *table; struct net *net; @@ -7257,13 +7322,14 @@ static int nf_tables_flowtable_event(struct notifier_block *this, return 0; net = dev_net(dev); - mutex_lock(&net->nft.commit_mutex); - list_for_each_entry(table, &net->nft.tables, list) { + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(flowtable, &table->flowtables, list) { nft_flowtable_event(event, dev, flowtable); } } - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } @@ -7444,16 +7510,17 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { static int nf_tables_validate(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table; - switch (net->nft.validate_state) { + switch (nft_net->validate_state) { case NFT_VALIDATE_SKIP: break; case NFT_VALIDATE_NEED: nft_validate_state_update(net, NFT_VALIDATE_DO); fallthrough; case NFT_VALIDATE_DO: - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (nft_table_validate(net, table) < 0) return -EAGAIN; } @@ -7630,9 +7697,10 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha static void nf_tables_commit_chain_prepare_cancel(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWRULE || @@ -7730,10 +7798,11 @@ void nft_chain_del(struct nft_chain *chain) static void nf_tables_module_autoload_cleanup(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_module_request *req, *next; - WARN_ON_ONCE(!list_empty(&net->nft.commit_list)); - list_for_each_entry_safe(req, next, &net->nft.module_list, list) { + WARN_ON_ONCE(!list_empty(&nft_net->commit_list)); + list_for_each_entry_safe(req, next, &nft_net->module_list, list) { WARN_ON_ONCE(!req->done); list_del(&req->list); kfree(req); @@ -7742,6 +7811,7 @@ static void nf_tables_module_autoload_cleanup(struct net *net) static void nf_tables_commit_release(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; /* all side effects have to be made visible. @@ -7751,35 +7821,36 @@ static void nf_tables_commit_release(struct net *net) * Memory reclaim happens asynchronously from work queue * to prevent expensive synchronize_rcu() in commit phase. */ - if (list_empty(&net->nft.commit_list)) { + if (list_empty(&nft_net->commit_list)) { nf_tables_module_autoload_cleanup(net); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return; } - trans = list_last_entry(&net->nft.commit_list, + trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); get_net(trans->ctx.net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; spin_lock(&nf_tables_destroy_list_lock); - list_splice_tail_init(&net->nft.commit_list, &nf_tables_destroy_list); + list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); spin_unlock(&nf_tables_destroy_list_lock); nf_tables_module_autoload_cleanup(net); schedule_work(&trans_destroy_work); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); } static void nft_commit_notify(struct net *net, u32 portid) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct sk_buff *batch_skb = NULL, *nskb, *skb; unsigned char *data; int len; - list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) { + list_for_each_entry_safe(skb, nskb, &nft_net->notify_list, list) { if (!batch_skb) { new_batch: batch_skb = skb; @@ -7805,7 +7876,7 @@ static void nft_commit_notify(struct net *net, u32 portid) NFT_CB(batch_skb).report, GFP_KERNEL); } - WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); + WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); } static int nf_tables_commit_audit_alloc(struct list_head *adl, @@ -7871,6 +7942,7 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; struct nft_trans_elem *te; struct nft_chain *chain; @@ -7878,8 +7950,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) LIST_HEAD(adl); int err; - if (list_empty(&net->nft.commit_list)) { - mutex_unlock(&net->nft.commit_mutex); + if (list_empty(&nft_net->commit_list)) { + mutex_unlock(&nft_net->commit_mutex); return 0; } @@ -7892,7 +7964,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return err; /* 1. Allocate space for next generation rules_gen_X[] */ - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { int ret; ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); @@ -7915,7 +7987,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } /* step 2. Make rules_gen_X visible to packet path */ - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(chain, &table->chains, list) nf_tables_commit_chain(net, chain); } @@ -7924,12 +7996,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) * Bump generation counter, invalidate any dump in progress. * Cannot fail after this point. */ - while (++net->nft.base_seq == 0); + while (++nft_net->base_seq == 0) + ; /* step 3. Start new generation, rules_gen_X now in use. */ net->nft.gencursor = nft_gencursor_next(net); - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { nf_tables_commit_audit_collect(&adl, trans->ctx.table, trans->msg_type); switch (trans->msg_type) { @@ -8089,7 +8162,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_commit_notify(net, NETLINK_CB(skb).portid); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); - nf_tables_commit_audit_log(&adl, net->nft.base_seq); + nf_tables_commit_audit_log(&adl, nft_net->base_seq); nf_tables_commit_release(net); return 0; @@ -8097,17 +8170,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) static void nf_tables_module_autoload(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_module_request *req, *next; LIST_HEAD(module_list); - list_splice_init(&net->nft.module_list, &module_list); - mutex_unlock(&net->nft.commit_mutex); + list_splice_init(&nft_net->module_list, &module_list); + mutex_unlock(&nft_net->commit_mutex); list_for_each_entry_safe(req, next, &module_list, list) { request_module("%s", req->module); req->done = true; } - mutex_lock(&net->nft.commit_mutex); - list_splice(&module_list, &net->nft.module_list); + mutex_lock(&nft_net->commit_mutex); + list_splice(&module_list, &nft_net->module_list); } static void nf_tables_abort_release(struct nft_trans *trans) @@ -8144,6 +8218,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans, *next; struct nft_trans_elem *te; @@ -8151,7 +8226,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nf_tables_validate(net) < 0) return -EAGAIN; - list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list, + list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { switch (trans->msg_type) { case NFT_MSG_NEWTABLE: @@ -8277,7 +8352,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) synchronize_rcu(); list_for_each_entry_safe_reverse(trans, next, - &net->nft.commit_list, list) { + &nft_net->commit_list, list) { list_del(&trans->list); nf_tables_abort_release(trans); } @@ -8293,22 +8368,24 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) static int nf_tables_abort(struct net *net, struct sk_buff *skb, enum nfnl_abort_action action) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); int ret = __nf_tables_abort(net, action); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return ret; } static bool nf_tables_valid_genid(struct net *net, u32 genid) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); bool genid_ok; - mutex_lock(&net->nft.commit_mutex); + mutex_lock(&nft_net->commit_mutex); - genid_ok = genid == 0 || net->nft.base_seq == genid; + genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); /* else, commit mutex has to be released by commit or abort function */ return genid_ok; @@ -8909,19 +8986,19 @@ EXPORT_SYMBOL_GPL(__nft_release_basechain); static void __nft_release_hooks(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_table *table; struct nft_chain *chain; - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { list_for_each_entry(chain, &table->chains, list) nf_tables_unregister_hook(net, table, chain); } } -static void __nft_release_tables(struct net *net) +static void __nft_release_table(struct net *net, struct nft_table *table) { struct nft_flowtable *flowtable, *nf; - struct nft_table *table, *nt; struct nft_chain *chain, *nc; struct nft_object *obj, *ne; struct nft_rule *rule, *nr; @@ -8931,79 +9008,94 @@ static void __nft_release_tables(struct net *net) .family = NFPROTO_NETDEV, }; - list_for_each_entry_safe(table, nt, &net->nft.tables, list) { - ctx.family = table->family; - ctx.table = table; - list_for_each_entry(chain, &table->chains, list) { - ctx.chain = chain; - list_for_each_entry_safe(rule, nr, &chain->rules, list) { - list_del(&rule->list); - chain->use--; - nf_tables_rule_release(&ctx, rule); - } + ctx.family = table->family; + ctx.table = table; + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + list_for_each_entry_safe(rule, nr, &chain->rules, list) { + list_del(&rule->list); + chain->use--; + nf_tables_rule_release(&ctx, rule); } - list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { - list_del(&flowtable->list); - table->use--; - nf_tables_flowtable_destroy(flowtable); - } - list_for_each_entry_safe(set, ns, &table->sets, list) { - list_del(&set->list); - table->use--; - nft_set_destroy(&ctx, set); - } - list_for_each_entry_safe(obj, ne, &table->objects, list) { - nft_obj_del(obj); - table->use--; - nft_obj_destroy(&ctx, obj); - } - list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; - nft_chain_del(chain); - table->use--; - nf_tables_chain_destroy(&ctx); - } - list_del(&table->list); - nf_tables_table_destroy(&ctx); } + list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { + list_del(&flowtable->list); + table->use--; + nf_tables_flowtable_destroy(flowtable); + } + list_for_each_entry_safe(set, ns, &table->sets, list) { + list_del(&set->list); + table->use--; + nft_set_destroy(&ctx, set); + } + list_for_each_entry_safe(obj, ne, &table->objects, list) { + nft_obj_del(obj); + table->use--; + nft_obj_destroy(&ctx, obj); + } + list_for_each_entry_safe(chain, nc, &table->chains, list) { + ctx.chain = chain; + nft_chain_del(chain); + table->use--; + nf_tables_chain_destroy(&ctx); + } + list_del(&table->list); + nf_tables_table_destroy(&ctx); +} + +static void __nft_release_tables(struct net *net) +{ + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + struct nft_table *table, *nt; + + list_for_each_entry_safe(table, nt, &nft_net->tables, list) + __nft_release_table(net, table); } static int __net_init nf_tables_init_net(struct net *net) { - INIT_LIST_HEAD(&net->nft.tables); - INIT_LIST_HEAD(&net->nft.commit_list); - INIT_LIST_HEAD(&net->nft.module_list); - INIT_LIST_HEAD(&net->nft.notify_list); - mutex_init(&net->nft.commit_mutex); - net->nft.base_seq = 1; - net->nft.validate_state = NFT_VALIDATE_SKIP; + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + INIT_LIST_HEAD(&nft_net->tables); + INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->module_list); + INIT_LIST_HEAD(&nft_net->notify_list); + mutex_init(&nft_net->commit_mutex); + nft_net->base_seq = 1; + nft_net->validate_state = NFT_VALIDATE_SKIP; return 0; } static void __net_exit nf_tables_pre_exit_net(struct net *net) { - mutex_lock(&net->nft.commit_mutex); + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + mutex_lock(&nft_net->commit_mutex); __nft_release_hooks(net); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); } static void __net_exit nf_tables_exit_net(struct net *net) { - mutex_lock(&net->nft.commit_mutex); - if (!list_empty(&net->nft.commit_list)) + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); + + mutex_lock(&nft_net->commit_mutex); + if (!list_empty(&nft_net->commit_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); __nft_release_tables(net); - mutex_unlock(&net->nft.commit_mutex); - WARN_ON_ONCE(!list_empty(&net->nft.tables)); - WARN_ON_ONCE(!list_empty(&net->nft.module_list)); - WARN_ON_ONCE(!list_empty(&net->nft.notify_list)); + mutex_unlock(&nft_net->commit_mutex); + WARN_ON_ONCE(!list_empty(&nft_net->tables)); + WARN_ON_ONCE(!list_empty(&nft_net->module_list)); + WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); } static struct pernet_operations nf_tables_net_ops = { .init = nf_tables_init_net, .pre_exit = nf_tables_pre_exit_net, .exit = nf_tables_exit_net, + .id = &nf_tables_net_id, + .size = sizeof(struct nftables_pernet), }; static int __init nf_tables_module_init(void) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 4e99b1731b3f..5cfbb29d8a34 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -7,6 +7,8 @@ #include #include +extern unsigned int nf_tables_net_id; + static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) { struct nft_flow_rule *flow; @@ -371,16 +373,18 @@ static void nft_indr_block_cleanup(struct flow_block_cb *block_cb) struct nft_base_chain *basechain = block_cb->indr.data; struct net_device *dev = block_cb->indr.dev; struct netlink_ext_ack extack = {}; + struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct flow_block_offload bo; nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND, basechain, &extack); - mutex_lock(&net->nft.commit_mutex); + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); list_del(&block_cb->driver_list); list_move(&block_cb->list, &bo.cb_list); nft_flow_offload_unbind(&bo, basechain); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); } static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain, @@ -476,9 +480,10 @@ static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy, static void nft_flow_rule_offload_abort(struct net *net, struct nft_trans *trans) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); int err = 0; - list_for_each_entry_continue_reverse(trans, &net->nft.commit_list, list) { + list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) continue; @@ -524,11 +529,12 @@ static void nft_flow_rule_offload_abort(struct net *net, int nft_flow_rule_offload_commit(struct net *net) { + struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); struct nft_trans *trans; int err = 0; u8 policy; - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) continue; @@ -580,7 +586,7 @@ int nft_flow_rule_offload_commit(struct net *net) } } - list_for_each_entry(trans, &net->nft.commit_list, list) { + list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) continue; @@ -600,15 +606,15 @@ int nft_flow_rule_offload_commit(struct net *net) return err; } -static struct nft_chain *__nft_offload_get_chain(struct net_device *dev) +static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *nft_net, + struct net_device *dev) { struct nft_base_chain *basechain; - struct net *net = dev_net(dev); struct nft_hook *hook, *found; const struct nft_table *table; struct nft_chain *chain; - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; @@ -640,19 +646,21 @@ static int nft_offload_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nftables_pernet *nft_net; struct net *net = dev_net(dev); struct nft_chain *chain; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - mutex_lock(&net->nft.commit_mutex); - chain = __nft_offload_get_chain(dev); + nft_net = net_generic(net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + chain = __nft_offload_get_chain(nft_net, dev); if (chain) nft_flow_block_chain(nft_base_chain(chain), dev, FLOW_BLOCK_UNBIND); - mutex_unlock(&net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index ff8528ad3dc6..7a9aa57b195b 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -10,6 +11,8 @@ #include #include +extern unsigned int nf_tables_net_id; + #ifdef CONFIG_NF_TABLES_IPV4 static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, @@ -355,6 +358,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct nftables_pernet *nft_net; struct nft_table *table; struct nft_chain *chain, *nr; struct nft_ctx ctx = { @@ -365,8 +369,9 @@ static int nf_tables_netdev_event(struct notifier_block *this, event != NETDEV_CHANGENAME) return NOTIFY_DONE; - mutex_lock(&ctx.net->nft.commit_mutex); - list_for_each_entry(table, &ctx.net->nft.tables, list) { + nft_net = net_generic(ctx.net, nf_tables_net_id); + mutex_lock(&nft_net->commit_mutex); + list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV) continue; @@ -380,7 +385,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, nft_netdev_event(event, dev, &ctx); } } - mutex_unlock(&ctx.net->nft.commit_mutex); + mutex_unlock(&nft_net->commit_mutex); return NOTIFY_DONE; } diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 038588d4d80e..8d47782b778f 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -11,6 +11,9 @@ #include #include #include +#include + +extern unsigned int nf_tables_net_id; struct nft_dynset { struct nft_set *set; @@ -106,13 +109,14 @@ static int nft_dynset_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { + struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id); struct nft_dynset *priv = nft_expr_priv(expr); u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u64 timeout; int err; - lockdep_assert_held(&ctx->net->nft.commit_mutex); + lockdep_assert_held(&nft_net->commit_mutex); if (tb[NFTA_DYNSET_SET_NAME] == NULL || tb[NFTA_DYNSET_OP] == NULL || From e546e6ebb19d89a79d0569e968fc788879f86e5d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Jul 2023 10:48:50 +0200 Subject: [PATCH 313/509] netfilter: nf_tables: add rescheduling points during loop detection walks [ Upstream commit 81ea010667417ef3f218dfd99b69769fe66c2b67 ] Add explicit rescheduling points during ruleset walk. Switching to a faster algorithm is possible but this is a much smaller change, suitable for nf tree. Link: https://bugzilla.netfilter.org/show_bug.cgi?id=1460 Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e3865f29ae83..3c9292d3f826 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3164,6 +3164,8 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (err < 0) return err; } + + cond_resched(); } return 0; @@ -8506,9 +8508,13 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, break; } } + + cond_resched(); } list_for_each_entry(set, &ctx->table->sets, list) { + cond_resched(); + if (!nft_is_active_next(ctx->net, set)) continue; if (!(set->flags & NFT_SET_MAP) || From 8180fc2fadd48dde4966f2db2c716c2ce7510d0b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jul 2023 10:48:51 +0200 Subject: [PATCH 314/509] netfilter: nf_tables: incorrect error path handling with NFT_MSG_NEWRULE [ Upstream commit 1240eb93f0616b21c675416516ff3d74798fdc97 ] In case of error when adding a new rule that refers to an anonymous set, deactivate expressions via NFT_TRANS_PREPARE state, not NFT_TRANS_RELEASE. Thus, the lookup expression marks anonymous sets as inactive in the next generation to ensure it is not reachable in this transaction anymore and decrement the set refcount as introduced by c1592a89942e ("netfilter: nf_tables: deactivate anonymous set from preparation phase"). The abort step takes care of undoing the anonymous set. This is also consistent with rule deletion, where NFT_TRANS_PREPARE is used. Note that this error path is exercised in the preparation step of the commit protocol. This patch replaces nf_tables_rule_release() by the deactivate and destroy calls, this time with NFT_TRANS_PREPARE. Due to this incorrect error handling, it is possible to access a dangling pointer to the anonymous set that remains in the transaction list. [1009.379054] BUG: KASAN: use-after-free in nft_set_lookup_global+0x147/0x1a0 [nf_tables] [1009.379106] Read of size 8 at addr ffff88816c4c8020 by task nft-rule-add/137110 [1009.379116] CPU: 7 PID: 137110 Comm: nft-rule-add Not tainted 6.4.0-rc4+ #256 [1009.379128] Call Trace: [1009.379132] [1009.379135] dump_stack_lvl+0x33/0x50 [1009.379146] ? nft_set_lookup_global+0x147/0x1a0 [nf_tables] [1009.379191] print_address_description.constprop.0+0x27/0x300 [1009.379201] kasan_report+0x107/0x120 [1009.379210] ? nft_set_lookup_global+0x147/0x1a0 [nf_tables] [1009.379255] nft_set_lookup_global+0x147/0x1a0 [nf_tables] [1009.379302] nft_lookup_init+0xa5/0x270 [nf_tables] [1009.379350] nf_tables_newrule+0x698/0xe50 [nf_tables] [1009.379397] ? nf_tables_rule_release+0xe0/0xe0 [nf_tables] [1009.379441] ? kasan_unpoison+0x23/0x50 [1009.379450] nfnetlink_rcv_batch+0x97c/0xd90 [nfnetlink] [1009.379470] ? nfnetlink_rcv_msg+0x480/0x480 [nfnetlink] [1009.379485] ? __alloc_skb+0xb8/0x1e0 [1009.379493] ? __alloc_skb+0xb8/0x1e0 [1009.379502] ? entry_SYSCALL_64_after_hwframe+0x46/0xb0 [1009.379509] ? unwind_get_return_address+0x2a/0x40 [1009.379517] ? write_profile+0xc0/0xc0 [1009.379524] ? avc_lookup+0x8f/0xc0 [1009.379532] ? __rcu_read_unlock+0x43/0x60 Fixes: 958bee14d071 ("netfilter: nf_tables: use new transaction infrastructure to handle sets") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3c9292d3f826..b32838974ff1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3409,7 +3409,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return 0; err2: - nf_tables_rule_release(&ctx, rule); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE); + nf_tables_rule_destroy(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops) { From d53c295c1f43b7460d28ba0f0f98a602084fdcb6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jul 2023 10:48:52 +0200 Subject: [PATCH 315/509] netfilter: nf_tables: fix chain binding transaction logic [ Upstream commit 4bedf9eee016286c835e3d8fa981ddece5338795 ] Add bound flag to rule and chain transactions as in 6a0a8d10a366 ("netfilter: nf_tables: use-after-free in failing rule with bound set") to skip them in case that the chain is already bound from the abort path. This patch fixes an imbalance in the chain use refcnt that triggers a WARN_ON on the table and chain destroy path. This patch also disallows nested chain bindings, which is not supported from userspace. The logic to deal with chain binding in nft_data_hold() and nft_data_release() is not correct. The NFT_TRANS_PREPARE state needs a special handling in case a chain is bound but next expressions in the same rule fail to initialize as described by 1240eb93f061 ("netfilter: nf_tables: incorrect error path handling with NFT_MSG_NEWRULE"). The chain is left bound if rule construction fails, so the objects stored in this chain (and the chain itself) are released by the transaction records from the abort path, follow up patch ("netfilter: nf_tables: add NFT_TRANS_PREPARE_ERROR to deal with bound set/chain") completes this error handling. When deleting an existing rule, chain bound flag is set off so the rule expression .destroy path releases the objects. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 21 +++++++- net/netfilter/nf_tables_api.c | 86 +++++++++++++++++++----------- net/netfilter/nft_immediate.c | 87 +++++++++++++++++++++++++++---- 3 files changed, 153 insertions(+), 41 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 5e184c20bc55..28b3d4fbebae 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -907,7 +907,10 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) return (void *)&rule->data[rule->dlen]; } -void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule); +void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule); +void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, + enum nft_trans_phase phase); +void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule); static inline void nft_set_elem_update_expr(const struct nft_set_ext *ext, struct nft_regs *regs, @@ -966,6 +969,7 @@ struct nft_chain { }; int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); +int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, @@ -1002,11 +1006,17 @@ int nft_chain_validate_dependency(const struct nft_chain *chain, int nft_chain_validate_hooks(const struct nft_chain *chain, unsigned int hook_flags); +static inline bool nft_chain_binding(const struct nft_chain *chain) +{ + return chain->flags & NFT_CHAIN_BINDING; +} + static inline bool nft_chain_is_bound(struct nft_chain *chain) { return (chain->flags & NFT_CHAIN_BINDING) && chain->bound; } +int nft_chain_add(struct nft_table *table, struct nft_chain *chain); void nft_chain_del(struct nft_chain *chain); void nf_tables_chain_destroy(struct nft_ctx *ctx); @@ -1431,6 +1441,7 @@ struct nft_trans_rule { struct nft_rule *rule; struct nft_flow_rule *flow; u32 rule_id; + bool bound; }; #define nft_trans_rule(trans) \ @@ -1439,6 +1450,8 @@ struct nft_trans_rule { (((struct nft_trans_rule *)trans->data)->flow) #define nft_trans_rule_id(trans) \ (((struct nft_trans_rule *)trans->data)->rule_id) +#define nft_trans_rule_bound(trans) \ + (((struct nft_trans_rule *)trans->data)->bound) struct nft_trans_set { struct nft_set *set; @@ -1454,13 +1467,17 @@ struct nft_trans_set { (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { + struct nft_chain *chain; bool update; char *name; struct nft_stats __percpu *stats; u8 policy; + bool bound; u32 chain_id; }; +#define nft_trans_chain(trans) \ + (((struct nft_trans_chain *)trans->data)->chain) #define nft_trans_chain_update(trans) \ (((struct nft_trans_chain *)trans->data)->update) #define nft_trans_chain_name(trans) \ @@ -1469,6 +1486,8 @@ struct nft_trans_chain { (((struct nft_trans_chain *)trans->data)->stats) #define nft_trans_chain_policy(trans) \ (((struct nft_trans_chain *)trans->data)->policy) +#define nft_trans_chain_bound(trans) \ + (((struct nft_trans_chain *)trans->data)->bound) #define nft_trans_chain_id(trans) \ (((struct nft_trans_chain *)trans->data)->chain_id) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b32838974ff1..10ec55f3f80a 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -197,6 +197,48 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) } } +static void nft_chain_trans_bind(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + struct nftables_pernet *nft_net; + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_chain_binding(chain)) + return; + + nft_net = net_generic(net, nf_tables_net_id); + list_for_each_entry_reverse(trans, &nft_net->commit_list, list) { + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + if (nft_trans_chain(trans) == chain) + nft_trans_chain_bound(trans) = true; + break; + case NFT_MSG_NEWRULE: + if (trans->ctx.chain == chain) + nft_trans_rule_bound(trans) = true; + break; + } + } +} + +int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + if (!nft_chain_binding(chain)) + return 0; + + if (nft_chain_binding(ctx->chain)) + return -EOPNOTSUPP; + + if (chain->bound) + return -EBUSY; + + chain->bound = true; + chain->use++; + nft_chain_trans_bind(ctx, chain); + + return 0; +} + static int nft_netdev_register_hooks(struct net *net, struct list_head *hook_list) { @@ -328,8 +370,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } - + nft_trans_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); + return trans; } @@ -347,8 +390,7 @@ static int nft_delchain(struct nft_ctx *ctx) return 0; } -static void nft_rule_expr_activate(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr; @@ -361,9 +403,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx, } } -static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule, - enum nft_trans_phase phase) +void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr; @@ -2017,7 +2058,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, return 0; } -static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) +int nft_chain_add(struct nft_table *table, struct nft_chain *chain) { int err; @@ -3118,8 +3159,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, return err; } -static void nf_tables_rule_destroy(const struct nft_ctx *ctx, - struct nft_rule *rule) +void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) { struct nft_expr *expr, *next; @@ -3136,7 +3176,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } -void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) +static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); @@ -5547,7 +5587,6 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { struct nft_chain *chain; - struct nft_rule *rule; if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { @@ -5555,15 +5594,6 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) case NFT_GOTO: chain = data->verdict.chain; chain->use++; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use++; - list_for_each_entry(rule, &chain->rules, list) - chain->use++; - - nft_chain_add(chain->table, chain); break; } } @@ -8254,7 +8284,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) kfree(nft_trans_chain_name(trans)); nft_trans_destroy(trans); } else { - if (nft_chain_is_bound(trans->ctx.chain)) { + if (nft_trans_chain_bound(trans)) { nft_trans_destroy(trans); break; } @@ -8271,6 +8301,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: + if (nft_trans_rule_bound(trans)) { + nft_trans_destroy(trans); + break; + } trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); nft_rule_expr_deactivate(&trans->ctx, @@ -8796,22 +8830,12 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { struct nft_chain *chain; - struct nft_rule *rule; switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: chain = data->verdict.chain; chain->use--; - - if (!nft_chain_is_bound(chain)) - break; - - chain->table->use--; - list_for_each_entry(rule, &chain->rules, list) - chain->use--; - - nft_chain_del(chain); break; } } diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index fcdbc5ed3f36..9d4248898ce4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -76,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx, switch (priv->data.verdict.code) { case NFT_JUMP: case NFT_GOTO: - if (nft_chain_is_bound(chain)) { - err = -EBUSY; - goto err1; - } - chain->bound = true; + err = nf_tables_bind_chain(ctx, chain); + if (err < 0) + return err; break; default: break; @@ -98,6 +96,31 @@ static void nft_immediate_activate(const struct nft_ctx *ctx, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg == NFT_REG_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + if (!nft_chain_binding(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_activate(&chain_ctx, rule); + + nft_clear(ctx->net, chain); + break; + default: + break; + } + } return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); } @@ -107,6 +130,40 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); + const struct nft_data *data = &priv->data; + struct nft_ctx chain_ctx; + struct nft_chain *chain; + struct nft_rule *rule; + + if (priv->dreg == NFT_REG_VERDICT) { + switch (data->verdict.code) { + case NFT_JUMP: + case NFT_GOTO: + chain = data->verdict.chain; + if (!nft_chain_binding(chain)) + break; + + chain_ctx = *ctx; + chain_ctx.chain = chain; + + list_for_each_entry(rule, &chain->rules, list) + nft_rule_expr_deactivate(&chain_ctx, rule, phase); + + switch (phase) { + case NFT_TRANS_PREPARE: + nft_deactivate_next(ctx->net, chain); + break; + default: + nft_chain_del(chain); + chain->bound = false; + chain->table->use--; + break; + } + break; + default: + break; + } + } if (phase == NFT_TRANS_COMMIT) return; @@ -131,15 +188,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, case NFT_GOTO: chain = data->verdict.chain; - if (!nft_chain_is_bound(chain)) + if (!nft_chain_binding(chain)) break; + /* Rule construction failed, but chain is already bound: + * let the transaction records release this chain and its rules. + */ + if (chain->bound) { + chain->use--; + break; + } + + /* Rule has been deleted, release chain and its rules. */ chain_ctx = *ctx; chain_ctx.chain = chain; - list_for_each_entry_safe(rule, n, &chain->rules, list) - nf_tables_rule_release(&chain_ctx, rule); - + chain->use--; + list_for_each_entry_safe(rule, n, &chain->rules, list) { + chain->use--; + list_del(&rule->list); + nf_tables_rule_destroy(&chain_ctx, rule); + } nf_tables_chain_destroy(&chain_ctx); break; default: From 34d09fe49f59d3d7cb79ace1785b138b3b8d9e52 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jul 2023 10:48:53 +0200 Subject: [PATCH 316/509] netfilter: nf_tables: add NFT_TRANS_PREPARE_ERROR to deal with bound set/chain [ Upstream commit 26b5a5712eb85e253724e56a54c17f8519bd8e4e ] Add a new state to deal with rule expressions deactivation from the newrule error path, otherwise the anonymous set remains in the list in inactive state for the next generation. Mark the set/chain transaction as unbound so the abort path releases this object, set it as inactive in the next generation so it is not reachable anymore from this transaction and reference counter is dropped. Fixes: 1240eb93f061 ("netfilter: nf_tables: incorrect error path handling with NFT_MSG_NEWRULE") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 45 ++++++++++++++++++++++++++----- net/netfilter/nft_immediate.c | 3 +++ 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 28b3d4fbebae..03886052491e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -777,6 +777,7 @@ struct nft_expr_type { enum nft_trans_phase { NFT_TRANS_PREPARE, + NFT_TRANS_PREPARE_ERROR, NFT_TRANS_ABORT, NFT_TRANS_COMMIT, NFT_TRANS_RELEASE @@ -970,6 +971,7 @@ struct nft_chain { int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); +void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 10ec55f3f80a..c646537efaf5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -173,7 +173,8 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set, + bool bind) { struct nftables_pernet *nft_net; struct net *net = ctx->net; @@ -187,17 +188,28 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) switch (trans->msg_type) { case NFT_MSG_NEWSET: if (nft_trans_set(trans) == set) - nft_trans_set_bound(trans) = true; + nft_trans_set_bound(trans) = bind; break; case NFT_MSG_NEWSETELEM: if (nft_trans_elem_set(trans) == set) - nft_trans_elem_set_bound(trans) = true; + nft_trans_elem_set_bound(trans) = bind; break; } } } -static void nft_chain_trans_bind(const struct nft_ctx *ctx, struct nft_chain *chain) +static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, true); +} + +static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set) +{ + return __nft_set_trans_bind(ctx, set, false); +} + +static void __nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain, bool bind) { struct nftables_pernet *nft_net; struct net *net = ctx->net; @@ -211,16 +223,22 @@ static void nft_chain_trans_bind(const struct nft_ctx *ctx, struct nft_chain *ch switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: if (nft_trans_chain(trans) == chain) - nft_trans_chain_bound(trans) = true; + nft_trans_chain_bound(trans) = bind; break; case NFT_MSG_NEWRULE: if (trans->ctx.chain == chain) - nft_trans_rule_bound(trans) = true; + nft_trans_rule_bound(trans) = bind; break; } } } +static void nft_chain_trans_bind(const struct nft_ctx *ctx, + struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, true); +} + int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) { if (!nft_chain_binding(chain)) @@ -239,6 +257,11 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) return 0; } +void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain) +{ + __nft_chain_trans_bind(ctx, chain, false); +} + static int nft_netdev_register_hooks(struct net *net, struct list_head *hook_list) { @@ -3449,7 +3472,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return 0; err2: - nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE); + nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR); nf_tables_rule_destroy(&ctx, rule); err1: for (i = 0; i < n; i++) { @@ -4585,6 +4608,13 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nft_set_trans_unbind(ctx, set); + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + + set->use--; + break; case NFT_TRANS_PREPARE: if (nft_set_is_anonymous(set)) nft_deactivate_next(ctx->net, set); @@ -6525,6 +6555,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx, enum nft_trans_phase phase) { switch (phase) { + case NFT_TRANS_PREPARE_ERROR: case NFT_TRANS_PREPARE: case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 9d4248898ce4..6b0efab4fad0 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -150,6 +150,9 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx, nft_rule_expr_deactivate(&chain_ctx, rule, phase); switch (phase) { + case NFT_TRANS_PREPARE_ERROR: + nf_tables_unbind_chain(ctx, chain); + fallthrough; case NFT_TRANS_PREPARE: nft_deactivate_next(ctx->net, chain); break; From 0205dd16edebee8074ac53cc67de98d959a24b60 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jul 2023 10:48:54 +0200 Subject: [PATCH 317/509] netfilter: nf_tables: reject unbound anonymous set before commit phase [ Upstream commit 938154b93be8cd611ddfd7bafc1849f3c4355201 ] Add a new list to track set transaction and to check for unbound anonymous sets before entering the commit phase. Bail out at the end of the transaction handling if an anonymous set remains unbound. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 3 +++ net/netfilter/nf_tables_api.c | 34 ++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 03886052491e..716a974a7263 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1426,6 +1426,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) * struct nft_trans - nf_tables object update in transaction * * @list: used internally + * @binding_list: list of objects with possible bindings * @msg_type: message type * @put_net: ctx->net needs to be put * @ctx: transaction context @@ -1433,6 +1434,7 @@ static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext) */ struct nft_trans { struct list_head list; + struct list_head binding_list; int msg_type; bool put_net; struct nft_ctx ctx; @@ -1559,6 +1561,7 @@ __be64 nf_jiffies64_to_msecs(u64 input); struct nftables_pernet { struct list_head tables; struct list_head commit_list; + struct list_head binding_list; struct list_head module_list; struct list_head notify_list; struct mutex commit_mutex; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c646537efaf5..ff65de8119c3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -155,6 +155,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, return NULL; INIT_LIST_HEAD(&trans->list); + INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -167,9 +168,15 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } -static void nft_trans_destroy(struct nft_trans *trans) +static void nft_trans_list_del(struct nft_trans *trans) { list_del(&trans->list); + list_del(&trans->binding_list); +} + +static void nft_trans_destroy(struct nft_trans *trans) +{ + nft_trans_list_del(trans); kfree(trans); } @@ -347,6 +354,14 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr struct nftables_pernet *nft_net; nft_net = net_generic(net, nf_tables_net_id); + + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_set_is_anonymous(nft_trans_set(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; + } + list_add_tail(&trans->list, &nft_net->commit_list); } @@ -7717,7 +7732,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w) synchronize_rcu(); list_for_each_entry_safe(trans, next, &head, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nft_commit_release(trans); } } @@ -8019,6 +8034,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } + list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_set_is_anonymous(nft_trans_set(trans)) && + !nft_trans_set_bound(trans)) { + pr_warn_once("nftables ruleset with unbound set\n"); + return -EINVAL; + } + break; + } + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; @@ -8421,7 +8448,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { - list_del(&trans->list); + nft_trans_list_del(trans); nf_tables_abort_release(trans); } @@ -9120,6 +9147,7 @@ static int __net_init nf_tables_init_net(struct net *net) INIT_LIST_HEAD(&nft_net->tables); INIT_LIST_HEAD(&nft_net->commit_list); + INIT_LIST_HEAD(&nft_net->binding_list); INIT_LIST_HEAD(&nft_net->module_list); INIT_LIST_HEAD(&nft_net->notify_list); mutex_init(&nft_net->commit_mutex); From 237f37f7b9f075c34bf0468c390cdd7e2725d8b0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jul 2023 10:48:55 +0200 Subject: [PATCH 318/509] netfilter: nf_tables: reject unbound chain set before commit phase [ Upstream commit 62e1e94b246e685d89c3163aaef4b160e42ceb02 ] Use binding list to track set transaction and to check for unbound chains before entering the commit phase. Bail out if chain binding remain unused before entering the commit step. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ff65de8119c3..ef965056bcca 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -360,6 +360,11 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr if (nft_set_is_anonymous(nft_trans_set(trans))) list_add_tail(&trans->binding_list, &nft_net->binding_list); break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans))) + list_add_tail(&trans->binding_list, &nft_net->binding_list); + break; } list_add_tail(&trans->list, &nft_net->commit_list); @@ -8043,6 +8048,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return -EINVAL; } break; + case NFT_MSG_NEWCHAIN: + if (!nft_trans_chain_update(trans) && + nft_chain_binding(nft_trans_chain(trans)) && + !nft_trans_chain_bound(trans)) { + pr_warn_once("nftables ruleset with unbound chain\n"); + return -EINVAL; + } + break; } } From 21cf0d66ef889f8e1f7181cb8960554142396912 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jul 2023 10:48:56 +0200 Subject: [PATCH 319/509] netfilter: nftables: rename set element data activation/deactivation functions [ Upstream commit f8bb7889af58d8e74d2d61c76b1418230f1610fa ] Rename: - nft_set_elem_activate() to nft_set_elem_data_activate(). - nft_set_elem_deactivate() to nft_set_elem_data_deactivate(). To prepare for updates in the set element infrastructure to add support for the special catch-all element. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ef965056bcca..5af9290af34f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5270,8 +5270,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_set_elem_deactivate() already deals with - * the refcounting from the preparation phase. +/* Only called from commit path, nft_setelem_data_deactivate() already deals + * with the refcounting from the preparation phase. */ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, const struct nft_set *set, void *elem) @@ -5649,9 +5649,9 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) } } -static void nft_set_elem_activate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -5661,9 +5661,9 @@ static void nft_set_elem_activate(const struct net *net, (*nft_set_ext_obj(ext))->use++; } -static void nft_set_elem_deactivate(const struct net *net, - const struct nft_set *set, - struct nft_set_elem *elem) +static void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -5740,7 +5740,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, kfree(elem.priv); elem.priv = priv; - nft_set_elem_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, &elem); nft_trans_elem(trans) = elem; nft_trans_commit_list_add_tail(ctx->net, trans); @@ -5774,7 +5774,7 @@ static int nft_flush_set(const struct nft_ctx *ctx, } set->ndeact++; - nft_set_elem_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem); nft_trans_elem_set(trans) = set; nft_trans_elem(trans) = *elem; nft_trans_commit_list_add_tail(ctx->net, trans); @@ -8413,7 +8413,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_set_elem_activate(net, te->set, &te->elem); + nft_setelem_data_activate(net, te->set, &te->elem); te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; From a136b7942ad2a50de708f76ea299ccb45ac7a7f9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jul 2023 10:48:57 +0200 Subject: [PATCH 320/509] netfilter: nf_tables: drop map element references from preparation phase [ Upstream commit 628bd3e49cba1c066228e23d71a852c23e26da73 ] set .destroy callback releases the references to other objects in maps. This is very late and it results in spurious EBUSY errors. Drop refcount from the preparation phase instead, update set backend not to drop reference counter from set .destroy path. Exceptions: NFT_TRANS_PREPARE_ERROR does not require to drop the reference counter because the transaction abort path releases the map references for each element since the set is unbound. The abort path also deals with releasing reference counter for new elements added to unbound sets. Fixes: 591054469b3e ("netfilter: nf_tables: revisit chain/object refcounting from elements") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- include/net/netfilter/nf_tables.h | 5 +- net/netfilter/nf_tables_api.c | 89 +++++++++++++++++++++++++++---- net/netfilter/nft_set_bitmap.c | 5 +- net/netfilter/nft_set_hash.c | 23 ++++++-- net/netfilter/nft_set_pipapo.c | 14 +++-- net/netfilter/nft_set_rbtree.c | 5 +- 6 files changed, 117 insertions(+), 24 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 716a974a7263..fb3c5f690750 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -382,7 +382,8 @@ struct nft_set_ops { int (*init)(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const nla[]); - void (*destroy)(const struct nft_set *set); + void (*destroy)(const struct nft_ctx *ctx, + const struct nft_set *set); void (*gc_init)(const struct nft_set *set); unsigned int elemsize; @@ -686,6 +687,8 @@ void *nft_set_elem_init(const struct nft_set *set, u64 timeout, u64 expiration, gfp_t gfp); void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem); /** * struct nft_set_gc_batch_head - nf_tables set garbage collection batch diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5af9290af34f..6f6e8c567892 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -557,6 +557,31 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, return 0; } +static void nft_setelem_data_deactivate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_deactivate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_deactivate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_deactivate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -565,6 +590,9 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); ctx->table->use--; @@ -4474,7 +4502,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (set->expr) nft_expr_destroy(&ctx, set->expr); - ops->destroy(set); + ops->destroy(&ctx, set); err_set_init: kfree(set->name); err_set_name: @@ -4490,7 +4518,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) if (set->expr) nft_expr_destroy(ctx, set->expr); - set->ops->destroy(set); + set->ops->destroy(ctx, set); kfree(set->name); kvfree(set); } @@ -4614,10 +4642,39 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } } +static void nft_setelem_data_activate(const struct net *net, + const struct nft_set *set, + struct nft_set_elem *elem); + +static int nft_mapelem_activate(const struct nft_ctx *ctx, + struct nft_set *set, + const struct nft_set_iter *iter, + struct nft_set_elem *elem) +{ + nft_setelem_data_activate(ctx->net, set, elem); + + return 0; +} + +static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set) +{ + struct nft_set_iter iter = { + .genmask = nft_genmask_next(ctx->net), + .fn = nft_mapelem_activate, + }; + + set->ops->walk(ctx, set, &iter); + WARN_ON_ONCE(iter.err); +} + void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (nft_set_is_anonymous(set)) + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(ctx, set); + nft_clear(ctx->net, set); + } set->use++; } @@ -4636,13 +4693,20 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, set->use--; break; case NFT_TRANS_PREPARE: - if (nft_set_is_anonymous(set)) - nft_deactivate_next(ctx->net, set); + if (nft_set_is_anonymous(set)) { + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + nft_deactivate_next(ctx->net, set); + } set->use--; return; case NFT_TRANS_ABORT: case NFT_TRANS_RELEASE: + if (nft_set_is_anonymous(set) && + set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(ctx, set); + set->use--; fallthrough; default: @@ -5249,6 +5313,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, } } +/* Drop references and destroy. Called from gc, dynset and abort path. */ void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr) { @@ -5270,11 +5335,11 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); -/* Only called from commit path, nft_setelem_data_deactivate() already deals - * with the refcounting from the preparation phase. +/* Destroy element. References have been already dropped in the preparation + * path via nft_setelem_data_deactivate(). */ -static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) +void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, void *elem) { struct nft_set_ext *ext = nft_set_elem_ext(set, elem); @@ -8399,6 +8464,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DELSET: trans->ctx.table->use++; nft_clear(trans->ctx.net, nft_trans_set(trans)); + if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -9128,6 +9196,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table) list_for_each_entry_safe(set, ns, &table->sets, list) { list_del(&set->list); table->use--; + if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) + nft_map_deactivate(&ctx, set); + nft_set_destroy(&ctx, set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 2a81ea421819..3c63f8acebd8 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -270,13 +270,14 @@ static int nft_bitmap_init(const struct nft_set *set, return 0; } -static void nft_bitmap_destroy(const struct nft_set *set) +static void nft_bitmap_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nft_set_elem_destroy(set, be, true); + nf_tables_set_elem_destroy(ctx, set, be); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index a5cfb321ae23..51d3e6f0934a 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -380,19 +380,31 @@ static int nft_rhash_init(const struct nft_set *set, return 0; } +struct nft_rhash_ctx { + const struct nft_ctx ctx; + const struct nft_set *set; +}; + static void nft_rhash_elem_destroy(void *ptr, void *arg) { - nft_set_elem_destroy(arg, ptr, true); + struct nft_rhash_ctx *rhash_ctx = arg; + + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); } -static void nft_rhash_destroy(const struct nft_set *set) +static void nft_rhash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_ctx rhash_ctx = { + .ctx = *ctx, + .set = set, + }; cancel_delayed_work_sync(&priv->gc_work); rcu_barrier(); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, - (void *)set); + (void *)&rhash_ctx); } /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ @@ -621,7 +633,8 @@ static int nft_hash_init(const struct nft_set *set, return 0; } -static void nft_hash_destroy(const struct nft_set *set) +static void nft_hash_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; @@ -631,7 +644,7 @@ static void nft_hash_destroy(const struct nft_set *set) for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nft_set_elem_destroy(set, he, true); + nf_tables_set_elem_destroy(ctx, set, he); } } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index eb5934eb3adf..7c759e9b4d84 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -2127,10 +2127,12 @@ static int nft_pipapo_init(const struct nft_set *set, /** * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array + * @ctx: context * @set: nftables API set representation * @m: matching data pointing to key mapping array */ -static void nft_set_pipapo_match_destroy(const struct nft_set *set, +static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, + const struct nft_set *set, struct nft_pipapo_match *m) { struct nft_pipapo_field *f; @@ -2147,15 +2149,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set, e = f->mt[r].e; - nft_set_elem_destroy(set, e, true); + nf_tables_set_elem_destroy(ctx, set, e); } } /** * nft_pipapo_destroy() - Free private data for set and all committed elements + * @ctx: context * @set: nftables API set representation */ -static void nft_pipapo_destroy(const struct nft_set *set) +static void nft_pipapo_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m; @@ -2165,7 +2169,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) if (m) { rcu_barrier(); - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(m->scratch_aligned); @@ -2182,7 +2186,7 @@ static void nft_pipapo_destroy(const struct nft_set *set) m = priv->clone; if (priv->dirty) - nft_set_pipapo_match_destroy(set, m); + nft_set_pipapo_match_destroy(ctx, set, m); #ifdef NFT_PIPAPO_ALIGN free_percpu(priv->clone->scratch_aligned); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 1ffb24f4c74c..172b994790a0 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -657,7 +657,8 @@ static int nft_rbtree_init(const struct nft_set *set, return 0; } -static void nft_rbtree_destroy(const struct nft_set *set) +static void nft_rbtree_destroy(const struct nft_ctx *ctx, + const struct nft_set *set) { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; @@ -668,7 +669,7 @@ static void nft_rbtree_destroy(const struct nft_set *set) while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nft_set_elem_destroy(set, rbe, true); + nf_tables_set_elem_destroy(ctx, set, rbe); } } From a07e415be383d0a91d4a6e6470f34663a5ef6600 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Jul 2023 10:48:58 +0200 Subject: [PATCH 321/509] netfilter: nf_tables: unbind non-anonymous set if rule construction fails [ Upstream commit 3e70489721b6c870252c9082c496703677240f53 ] Otherwise a dangling reference to a rule object that is gone remains in the set binding list. Fixes: 26b5a5712eb8 ("netfilter: nf_tables: add NFT_TRANS_PREPARE_ERROR to deal with bound set/chain") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6f6e8c567892..25b834868eac 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4689,6 +4689,8 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, nft_set_trans_unbind(ctx, set); if (nft_set_is_anonymous(set)) nft_deactivate_next(ctx->net, set); + else + list_del_rcu(&binding->list); set->use--; break; From be6478f5cce6fba311282a8e948a4d9852f38edd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Jul 2023 10:48:59 +0200 Subject: [PATCH 322/509] netfilter: nf_tables: fix scheduling-while-atomic splat [ Upstream commit 2024439bd5ceb145eeeb428b2a59e9b905153ac3 ] nf_tables_check_loops() can be called from rhashtable list walk so cond_resched() cannot be used here. Fixes: 81ea01066741 ("netfilter: nf_tables: add rescheduling points during loop detection walks") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 25b834868eac..7e037daaa613 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8684,13 +8684,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, break; } } - - cond_resched(); } list_for_each_entry(set, &ctx->table->sets, list) { - cond_resched(); - if (!nft_is_active_next(ctx->net, set)) continue; if (!(set->flags & NFT_SET_MAP) || From 8289d422f5e484efe4a565fe18e862ecd621c175 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Mon, 3 Jul 2023 16:52:16 +0200 Subject: [PATCH 323/509] netfilter: conntrack: Avoid nf_ct_helper_hash uses after free commit 6eef7a2b933885a17679eb8ed0796ddf0ee5309b upstream. If nf_conntrack_init_start() fails (for example due to a register_nf_conntrack_bpf() failure), the nf_conntrack_helper_fini() clean-up path frees the nf_ct_helper_hash map. When built with NF_CONNTRACK=y, further netfilter modules (e.g: netfilter_conntrack_ftp) can still be loaded and call nf_conntrack_helpers_register(), independently of whether nf_conntrack initialized correctly. This accesses the nf_ct_helper_hash dangling pointer and causes a uaf, possibly leading to random memory corruption. This patch guards nf_conntrack_helper_register() from accessing a freed or uninitialized nf_ct_helper_hash pointer and fixes possible uses-after-free when loading a conntrack module. Cc: stable@vger.kernel.org Fixes: 12f7a505331e ("netfilter: add user-space connection tracking helper infrastructure") Signed-off-by: Florent Revest Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_helper.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 118f415928ae..32cc91f5ba99 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -404,6 +404,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); + if (!nf_ct_helper_hash) + return -ENOENT; + if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; @@ -587,4 +590,5 @@ void nf_conntrack_helper_fini(void) { nf_ct_extend_unregister(&helper_extend); kvfree(nf_ct_helper_hash); + nf_ct_helper_hash = NULL; } From 4ae2e501331aaa506eaf760339bb2f43e5769395 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 5 Jul 2023 09:12:55 -0300 Subject: [PATCH 324/509] netfilter: nf_tables: do not ignore genmask when looking up chain by id commit 515ad530795c118f012539ed76d02bacfd426d89 upstream. When adding a rule to a chain referring to its ID, if that chain had been deleted on the same batch, the rule might end up referring to a deleted chain. This will lead to a WARNING like following: [ 33.098431] ------------[ cut here ]------------ [ 33.098678] WARNING: CPU: 5 PID: 69 at net/netfilter/nf_tables_api.c:2037 nf_tables_chain_destroy+0x23d/0x260 [ 33.099217] Modules linked in: [ 33.099388] CPU: 5 PID: 69 Comm: kworker/5:1 Not tainted 6.4.0+ #409 [ 33.099726] Workqueue: events nf_tables_trans_destroy_work [ 33.100018] RIP: 0010:nf_tables_chain_destroy+0x23d/0x260 [ 33.100306] Code: 8b 7c 24 68 e8 64 9c ed fe 4c 89 e7 e8 5c 9c ed fe 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 89 c6 89 c7 c3 cc cc cc cc <0f> 0b 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d 31 c0 89 c6 89 c7 [ 33.101271] RSP: 0018:ffffc900004ffc48 EFLAGS: 00010202 [ 33.101546] RAX: 0000000000000001 RBX: ffff888006fc0a28 RCX: 0000000000000000 [ 33.101920] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 33.102649] RBP: ffffc900004ffc78 R08: 0000000000000000 R09: 0000000000000000 [ 33.103018] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8880135ef500 [ 33.103385] R13: 0000000000000000 R14: dead000000000122 R15: ffff888006fc0a10 [ 33.103762] FS: 0000000000000000(0000) GS:ffff888024c80000(0000) knlGS:0000000000000000 [ 33.104184] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 33.104493] CR2: 00007fe863b56a50 CR3: 00000000124b0001 CR4: 0000000000770ee0 [ 33.104872] PKRU: 55555554 [ 33.104999] Call Trace: [ 33.105113] [ 33.105214] ? show_regs+0x72/0x90 [ 33.105371] ? __warn+0xa5/0x210 [ 33.105520] ? nf_tables_chain_destroy+0x23d/0x260 [ 33.105732] ? report_bug+0x1f2/0x200 [ 33.105902] ? handle_bug+0x46/0x90 [ 33.106546] ? exc_invalid_op+0x19/0x50 [ 33.106762] ? asm_exc_invalid_op+0x1b/0x20 [ 33.106995] ? nf_tables_chain_destroy+0x23d/0x260 [ 33.107249] ? nf_tables_chain_destroy+0x30/0x260 [ 33.107506] nf_tables_trans_destroy_work+0x669/0x680 [ 33.107782] ? mark_held_locks+0x28/0xa0 [ 33.107996] ? __pfx_nf_tables_trans_destroy_work+0x10/0x10 [ 33.108294] ? _raw_spin_unlock_irq+0x28/0x70 [ 33.108538] process_one_work+0x68c/0xb70 [ 33.108755] ? lock_acquire+0x17f/0x420 [ 33.108977] ? __pfx_process_one_work+0x10/0x10 [ 33.109218] ? do_raw_spin_lock+0x128/0x1d0 [ 33.109435] ? _raw_spin_lock_irq+0x71/0x80 [ 33.109634] worker_thread+0x2bd/0x700 [ 33.109817] ? __pfx_worker_thread+0x10/0x10 [ 33.110254] kthread+0x18b/0x1d0 [ 33.110410] ? __pfx_kthread+0x10/0x10 [ 33.110581] ret_from_fork+0x29/0x50 [ 33.110757] [ 33.110866] irq event stamp: 1651 [ 33.111017] hardirqs last enabled at (1659): [] __up_console_sem+0x79/0xa0 [ 33.111379] hardirqs last disabled at (1666): [] __up_console_sem+0x5e/0xa0 [ 33.111740] softirqs last enabled at (1616): [] __irq_exit_rcu+0x9e/0xe0 [ 33.112094] softirqs last disabled at (1367): [] __irq_exit_rcu+0x9e/0xe0 [ 33.112453] ---[ end trace 0000000000000000 ]--- This is due to the nft_chain_lookup_byid ignoring the genmask. After this change, adding the new rule will fail as it will not find the chain. Fixes: 837830a4b439 ("netfilter: nf_tables: add NFTA_RULE_CHAIN_ID attribute") Cc: stable@vger.kernel.org Reported-by: Mingi Cho of Theori working with ZDI Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_tables_api.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7e037daaa613..d56f5d7fa545 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2427,7 +2427,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, static struct nft_chain *nft_chain_lookup_byid(const struct net *net, const struct nft_table *table, - const struct nlattr *nla) + const struct nlattr *nla, u8 genmask) { struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id); u32 id = ntohl(nla_get_be32(nla)); @@ -2438,7 +2438,8 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, if (trans->msg_type == NFT_MSG_NEWCHAIN && chain->table == table && - id == nft_trans_chain_id(trans)) + id == nft_trans_chain_id(trans) && + nft_active_genmask(chain, genmask)) return chain; } return ERR_PTR(-ENOENT); @@ -3353,7 +3354,8 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return -EOPNOTSUPP; } else if (nla[NFTA_RULE_CHAIN_ID]) { - chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]); + chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID], + genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]); return PTR_ERR(chain); @@ -8937,7 +8939,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, genmask); } else if (tb[NFTA_VERDICT_CHAIN_ID]) { chain = nft_chain_lookup_byid(ctx->net, ctx->table, - tb[NFTA_VERDICT_CHAIN_ID]); + tb[NFTA_VERDICT_CHAIN_ID], + genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } else { From ea213922249c7e448d217a0a0441c6f86a8155fd Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Wed, 5 Jul 2023 18:05:35 -0300 Subject: [PATCH 325/509] netfilter: nf_tables: prevent OOB access in nft_byteorder_eval commit caf3ef7468f7534771b5c44cd8dbd6f7f87c2cbd upstream. When evaluating byteorder expressions with size 2, a union with 32-bit and 16-bit members is used. Since the 16-bit members are aligned to 32-bit, the array accesses will be out-of-bounds. It may lead to a stack-out-of-bounds access like the one below: [ 23.095215] ================================================================== [ 23.095625] BUG: KASAN: stack-out-of-bounds in nft_byteorder_eval+0x13c/0x320 [ 23.096020] Read of size 2 at addr ffffc90000007948 by task ping/115 [ 23.096358] [ 23.096456] CPU: 0 PID: 115 Comm: ping Not tainted 6.4.0+ #413 [ 23.096770] Call Trace: [ 23.096910] [ 23.097030] dump_stack_lvl+0x60/0xc0 [ 23.097218] print_report+0xcf/0x630 [ 23.097388] ? nft_byteorder_eval+0x13c/0x320 [ 23.097577] ? kasan_addr_to_slab+0xd/0xc0 [ 23.097760] ? nft_byteorder_eval+0x13c/0x320 [ 23.097949] kasan_report+0xc9/0x110 [ 23.098106] ? nft_byteorder_eval+0x13c/0x320 [ 23.098298] __asan_load2+0x83/0xd0 [ 23.098453] nft_byteorder_eval+0x13c/0x320 [ 23.098659] nft_do_chain+0x1c8/0xc50 [ 23.098852] ? __pfx_nft_do_chain+0x10/0x10 [ 23.099078] ? __kasan_check_read+0x11/0x20 [ 23.099295] ? __pfx___lock_acquire+0x10/0x10 [ 23.099535] ? __pfx___lock_acquire+0x10/0x10 [ 23.099745] ? __kasan_check_read+0x11/0x20 [ 23.099929] nft_do_chain_ipv4+0xfe/0x140 [ 23.100105] ? __pfx_nft_do_chain_ipv4+0x10/0x10 [ 23.100327] ? lock_release+0x204/0x400 [ 23.100515] ? nf_hook.constprop.0+0x340/0x550 [ 23.100779] nf_hook_slow+0x6c/0x100 [ 23.100977] ? __pfx_nft_do_chain_ipv4+0x10/0x10 [ 23.101223] nf_hook.constprop.0+0x334/0x550 [ 23.101443] ? __pfx_ip_local_deliver_finish+0x10/0x10 [ 23.101677] ? __pfx_nf_hook.constprop.0+0x10/0x10 [ 23.101882] ? __pfx_ip_rcv_finish+0x10/0x10 [ 23.102071] ? __pfx_ip_local_deliver_finish+0x10/0x10 [ 23.102291] ? rcu_read_lock_held+0x4b/0x70 [ 23.102481] ip_local_deliver+0xbb/0x110 [ 23.102665] ? __pfx_ip_rcv+0x10/0x10 [ 23.102839] ip_rcv+0x199/0x2a0 [ 23.102980] ? __pfx_ip_rcv+0x10/0x10 [ 23.103140] __netif_receive_skb_one_core+0x13e/0x150 [ 23.103362] ? __pfx___netif_receive_skb_one_core+0x10/0x10 [ 23.103647] ? mark_held_locks+0x48/0xa0 [ 23.103819] ? process_backlog+0x36c/0x380 [ 23.103999] __netif_receive_skb+0x23/0xc0 [ 23.104179] process_backlog+0x91/0x380 [ 23.104350] __napi_poll.constprop.0+0x66/0x360 [ 23.104589] ? net_rx_action+0x1cb/0x610 [ 23.104811] net_rx_action+0x33e/0x610 [ 23.105024] ? _raw_spin_unlock+0x23/0x50 [ 23.105257] ? __pfx_net_rx_action+0x10/0x10 [ 23.105485] ? mark_held_locks+0x48/0xa0 [ 23.105741] __do_softirq+0xfa/0x5ab [ 23.105956] ? __dev_queue_xmit+0x765/0x1c00 [ 23.106193] do_softirq.part.0+0x49/0xc0 [ 23.106423] [ 23.106547] [ 23.106670] __local_bh_enable_ip+0xf5/0x120 [ 23.106903] __dev_queue_xmit+0x789/0x1c00 [ 23.107131] ? __pfx___dev_queue_xmit+0x10/0x10 [ 23.107381] ? find_held_lock+0x8e/0xb0 [ 23.107585] ? lock_release+0x204/0x400 [ 23.107798] ? neigh_resolve_output+0x185/0x350 [ 23.108049] ? mark_held_locks+0x48/0xa0 [ 23.108265] ? neigh_resolve_output+0x185/0x350 [ 23.108514] neigh_resolve_output+0x246/0x350 [ 23.108753] ? neigh_resolve_output+0x246/0x350 [ 23.109003] ip_finish_output2+0x3c3/0x10b0 [ 23.109250] ? __pfx_ip_finish_output2+0x10/0x10 [ 23.109510] ? __pfx_nf_hook+0x10/0x10 [ 23.109732] __ip_finish_output+0x217/0x390 [ 23.109978] ip_finish_output+0x2f/0x130 [ 23.110207] ip_output+0xc9/0x170 [ 23.110404] ip_push_pending_frames+0x1a0/0x240 [ 23.110652] raw_sendmsg+0x102e/0x19e0 [ 23.110871] ? __pfx_raw_sendmsg+0x10/0x10 [ 23.111093] ? lock_release+0x204/0x400 [ 23.111304] ? __mod_lruvec_page_state+0x148/0x330 [ 23.111567] ? find_held_lock+0x8e/0xb0 [ 23.111777] ? find_held_lock+0x8e/0xb0 [ 23.111993] ? __rcu_read_unlock+0x7c/0x2f0 [ 23.112225] ? aa_sk_perm+0x18a/0x550 [ 23.112431] ? filemap_map_pages+0x4f1/0x900 [ 23.112665] ? __pfx_aa_sk_perm+0x10/0x10 [ 23.112880] ? find_held_lock+0x8e/0xb0 [ 23.113098] inet_sendmsg+0xa0/0xb0 [ 23.113297] ? inet_sendmsg+0xa0/0xb0 [ 23.113500] ? __pfx_inet_sendmsg+0x10/0x10 [ 23.113727] sock_sendmsg+0xf4/0x100 [ 23.113924] ? move_addr_to_kernel.part.0+0x4f/0xa0 [ 23.114190] __sys_sendto+0x1d4/0x290 [ 23.114391] ? __pfx___sys_sendto+0x10/0x10 [ 23.114621] ? __pfx_mark_lock.part.0+0x10/0x10 [ 23.114869] ? lock_release+0x204/0x400 [ 23.115076] ? find_held_lock+0x8e/0xb0 [ 23.115287] ? rcu_is_watching+0x23/0x60 [ 23.115503] ? __rseq_handle_notify_resume+0x6e2/0x860 [ 23.115778] ? __kasan_check_write+0x14/0x30 [ 23.116008] ? blkcg_maybe_throttle_current+0x8d/0x770 [ 23.116285] ? mark_held_locks+0x28/0xa0 [ 23.116503] ? do_syscall_64+0x37/0x90 [ 23.116713] __x64_sys_sendto+0x7f/0xb0 [ 23.116924] do_syscall_64+0x59/0x90 [ 23.117123] ? irqentry_exit_to_user_mode+0x25/0x30 [ 23.117387] ? irqentry_exit+0x77/0xb0 [ 23.117593] ? exc_page_fault+0x92/0x140 [ 23.117806] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 23.118081] RIP: 0033:0x7f744aee2bba [ 23.118282] Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 [ 23.119237] RSP: 002b:00007ffd04a7c9f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 23.119644] RAX: ffffffffffffffda RBX: 00007ffd04a7e0a0 RCX: 00007f744aee2bba [ 23.120023] RDX: 0000000000000040 RSI: 000056488e9e6300 RDI: 0000000000000003 [ 23.120413] RBP: 000056488e9e6300 R08: 00007ffd04a80320 R09: 0000000000000010 [ 23.120809] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 [ 23.121219] R13: 00007ffd04a7dc38 R14: 00007ffd04a7ca00 R15: 00007ffd04a7e0a0 [ 23.121617] [ 23.121749] [ 23.121845] The buggy address belongs to the virtual mapping at [ 23.121845] [ffffc90000000000, ffffc90000009000) created by: [ 23.121845] irq_init_percpu_irqstack+0x1cf/0x270 [ 23.122707] [ 23.122803] The buggy address belongs to the physical page: [ 23.123104] page:0000000072ac19f0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x24a09 [ 23.123609] flags: 0xfffffc0001000(reserved|node=0|zone=1|lastcpupid=0x1fffff) [ 23.123998] page_type: 0xffffffff() [ 23.124194] raw: 000fffffc0001000 ffffea0000928248 ffffea0000928248 0000000000000000 [ 23.124610] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 [ 23.125023] page dumped because: kasan: bad access detected [ 23.125326] [ 23.125421] Memory state around the buggy address: [ 23.125682] ffffc90000007800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 23.126072] ffffc90000007880: 00 00 00 00 00 f1 f1 f1 f1 f1 f1 00 00 f2 f2 00 [ 23.126455] >ffffc90000007900: 00 00 00 00 00 00 00 00 00 f2 f2 f2 f2 00 00 00 [ 23.126840] ^ [ 23.127138] ffffc90000007980: 00 00 00 00 00 00 00 00 00 00 00 00 00 f3 f3 f3 [ 23.127522] ffffc90000007a00: f3 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 [ 23.127906] ================================================================== [ 23.128324] Disabling lock debugging due to kernel taint Using simple s16 pointers for the 16-bit accesses fixes the problem. For the 32-bit accesses, src and dst can be used directly. Fixes: 96518518cc41 ("netfilter: add nftables") Cc: stable@vger.kernel.org Reported-by: Tanguy DUBROCA (@SidewayRE) from @Synacktiv working with ZDI Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_byteorder.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 9d5947ab8d4e..7b0b8fecb220 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -30,11 +30,11 @@ void nft_byteorder_eval(const struct nft_expr *expr, const struct nft_byteorder *priv = nft_expr_priv(expr); u32 *src = ®s->data[priv->sreg]; u32 *dst = ®s->data[priv->dreg]; - union { u32 u32; u16 u16; } *s, *d; + u16 *s16, *d16; unsigned int i; - s = (void *)src; - d = (void *)dst; + s16 = (void *)src; + d16 = (void *)dst; switch (priv->size) { case 8: { @@ -61,11 +61,11 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 4; i++) - d[i].u32 = ntohl((__force __be32)s[i].u32); + dst[i] = ntohl((__force __be32)src[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 4; i++) - d[i].u32 = (__force __u32)htonl(s[i].u32); + dst[i] = (__force __u32)htonl(src[i]); break; } break; @@ -73,11 +73,11 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 2; i++) - d[i].u16 = ntohs((__force __be16)s[i].u16); + d16[i] = ntohs((__force __be16)s16[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 2; i++) - d[i].u16 = (__force __u16)htons(s[i].u16); + d16[i] = (__force __u16)htons(s16[i]); break; } break; From 1b7107040596222982bbc0aa646692b0ef92e5fd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 3 Jul 2023 03:27:04 +0200 Subject: [PATCH 326/509] wireguard: queueing: use saner cpu selection wrapping commit 7387943fa35516f6f8017a3b0e9ce48a3bef9faa upstream. Using `% nr_cpumask_bits` is slow and complicated, and not totally robust toward dynamic changes to CPU topologies. Rather than storing the next CPU in the round-robin, just store the last one, and also return that value. This simplifies the loop drastically into a much more common pattern. Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Cc: stable@vger.kernel.org Reported-by: Linus Torvalds Tested-by: Manuel Leiner Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireguard/queueing.c | 1 + drivers/net/wireguard/queueing.h | 25 +++++++++++-------------- drivers/net/wireguard/receive.c | 2 +- drivers/net/wireguard/send.c | 2 +- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 8084e7408c0a..26d235d15235 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -28,6 +28,7 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, int ret; memset(queue, 0, sizeof(*queue)); + queue->last_cpu = -1; ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); if (ret) return ret; diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index e2388107f7fd..a2e702f8c582 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -119,20 +119,17 @@ static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) return cpu; } -/* This function is racy, in the sense that next is unlocked, so it could return - * the same CPU twice. A race-free version of this would be to instead store an - * atomic sequence number, do an increment-and-return, and then iterate through - * every possible CPU until we get to that index -- choose_cpu. However that's - * a bit slower, and it doesn't seem like this potential race actually - * introduces any performance loss, so we live with it. +/* This function is racy, in the sense that it's called while last_cpu is + * unlocked, so it could return the same CPU twice. Adding locking or using + * atomic sequence numbers is slower though, and the consequences of racing are + * harmless, so live with it. */ -static inline int wg_cpumask_next_online(int *next) +static inline int wg_cpumask_next_online(int *last_cpu) { - int cpu = *next; - - while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) - cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; - *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + int cpu = cpumask_next(*last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + *last_cpu = cpu; return cpu; } @@ -161,7 +158,7 @@ static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) static inline int wg_queue_enqueue_per_device_and_peer( struct crypt_queue *device_queue, struct prev_queue *peer_queue, - struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) + struct sk_buff *skb, struct workqueue_struct *wq) { int cpu; @@ -175,7 +172,7 @@ static inline int wg_queue_enqueue_per_device_and_peer( /* Then we queue it up in the device queue, which consumes the * packet as soon as it can. */ - cpu = wg_cpumask_next_online(next_cpu); + cpu = wg_cpumask_next_online(&device_queue->last_cpu); if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) return -EPIPE; queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 7b8df406c773..f500aaf67837 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -531,7 +531,7 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) goto err; ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, - wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu); + wg->packet_crypt_wq); if (unlikely(ret == -EPIPE)) wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD); if (likely(!ret || ret == -EPIPE)) { diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index 5368f7c35b4b..95c853b59e1d 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -318,7 +318,7 @@ static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) goto err; ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, - wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu); + wg->packet_crypt_wq); if (unlikely(ret == -EPIPE)) wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD); err: From 999f3b6104ed6ea7376a0f2bcabd5709813c3549 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 3 Jul 2023 03:27:05 +0200 Subject: [PATCH 327/509] wireguard: netlink: send staged packets when setting initial private key commit f58d0a9b4c6a7a5199c3af967e43cc8b654604d4 upstream. Packets bound for peers can queue up prior to the device private key being set. For example, if persistent keepalive is set, a packet is queued up to be sent as soon as the device comes up. However, if the private key hasn't been set yet, the handshake message never sends, and no timer is armed to retry, since that would be pointless. But, if a user later sets a private key, the expectation is that those queued packets, such as a persistent keepalive, are actually sent. So adjust the configuration logic to account for this edge case, and add a test case to make sure this works. Maxim noticed this with a wg-quick(8) config to the tune of: [Interface] PostUp = wg set %i private-key somefile [Peer] PublicKey = ... Endpoint = ... PersistentKeepalive = 25 Here, the private key gets set after the device comes up using a PostUp script, triggering the bug. Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Cc: stable@vger.kernel.org Reported-by: Maxim Cournoyer Tested-by: Maxim Cournoyer Link: https://lore.kernel.org/wireguard/87fs7xtqrv.fsf@gmail.com/ Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireguard/netlink.c | 14 ++++++---- tools/testing/selftests/wireguard/netns.sh | 30 +++++++++++++++++++--- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index 5c804bcabfe6..f5bc279c9a8c 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -546,6 +546,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); u8 public_key[NOISE_PUBLIC_KEY_LEN]; struct wg_peer *peer, *temp; + bool send_staged_packets; if (!crypto_memneq(wg->static_identity.static_private, private_key, NOISE_PUBLIC_KEY_LEN)) @@ -564,14 +565,17 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) } down_write(&wg->static_identity.lock); - wg_noise_set_static_identity_private_key(&wg->static_identity, - private_key); - list_for_each_entry_safe(peer, temp, &wg->peer_list, - peer_list) { + send_staged_packets = !wg->static_identity.has_identity && netif_running(wg->dev); + wg_noise_set_static_identity_private_key(&wg->static_identity, private_key); + send_staged_packets = send_staged_packets && wg->static_identity.has_identity; + + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { wg_noise_precompute_static_static(peer); wg_noise_expire_current_peer_keypairs(peer); + if (send_staged_packets) + wg_packet_send_staged_packets(peer); } - wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); up_write(&wg->static_identity.lock); } skip_set_private_key: diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 8a9461aa0878..93e44410f170 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -502,10 +502,32 @@ n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' n1 ping -W 1 -c 1 192.168.241.2 [[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] -ip1 link del veth1 -ip1 link del veth3 -ip1 link del wg0 -ip2 link del wg0 +ip1 link del dev veth3 +ip1 link del dev wg0 +ip2 link del dev wg0 + +# Make sure persistent keep alives are sent when an adapter comes up +ip1 link add dev wg0 type wireguard +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1 +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +ip1 link set dev wg0 up +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -gt 0 ]] +ip1 link del dev wg0 +# This should also happen even if the private key is set later +ip1 link add dev wg0 type wireguard +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1 +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +ip1 link set dev wg0 up +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +n1 wg set wg0 private-key <(echo "$key1") +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -gt 0 ]] +ip1 link del dev veth1 +ip1 link del dev wg0 # We test that Netlink/IPC is working properly by doing things that usually cause split responses ip0 link add dev wg0 type wireguard From 83be9fd7843c12b8a8d79a0267b004b3909b19e0 Mon Sep 17 00:00:00 2001 From: Sherry Sun Date: Mon, 19 Jun 2023 16:06:13 +0800 Subject: [PATCH 328/509] tty: serial: fsl_lpuart: add earlycon for imx8ulp platform commit e0edfdc15863ec80a1d9ac6e174dbccc00206dd0 upstream. Add earlycon support for imx8ulp platform. Signed-off-by: Sherry Sun Cc: stable Link: https://lore.kernel.org/r/20230619080613.16522-1-sherry.sun@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index ca22a1125821..a4cf00756681 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2589,6 +2589,7 @@ OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1021a-lpuart", lpuart32_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1028a-lpuart", ls1028a_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,imx7ulp-lpuart", lpuart32_imx_early_console_setup); +OF_EARLYCON_DECLARE(lpuart32, "fsl,imx8ulp-lpuart", lpuart32_imx_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,imx8qxp-lpuart", lpuart32_imx_early_console_setup); EARLYCON_DECLARE(lpuart, lpuart_early_console_setup); EARLYCON_DECLARE(lpuart32, lpuart32_early_console_setup); From 058f077d09ba09095eed4ae65e6abfe35dfe38e9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 15 Jul 2023 00:47:09 +0000 Subject: [PATCH 329/509] rcu-tasks: Mark ->trc_reader_nesting data races [ Upstream commit bdb0cca0d11060fce8a8a44588ac1470c25d62bc ] There are several ->trc_reader_nesting data races that are too low-probability for KCSAN to notice, but which will happen sooner or later. This commit therefore marks these accesses, and comments one that cannot race. Cc: # 5.10.x Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tasks.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index c66d47685b28..2b16a86163af 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -848,7 +848,7 @@ static void trc_read_check_handler(void *t_in) // If the task is not in a read-side critical section, and // if this is the last reader, awaken the grace-period kthread. - if (likely(!t->trc_reader_nesting)) { + if (likely(!READ_ONCE(t->trc_reader_nesting))) { if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) wake_up(&trc_wait); // Mark as checked after decrement to avoid false @@ -857,7 +857,7 @@ static void trc_read_check_handler(void *t_in) goto reset_ipi; } // If we are racing with an rcu_read_unlock_trace(), try again later. - if (unlikely(t->trc_reader_nesting < 0)) { + if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) { if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) wake_up(&trc_wait); goto reset_ipi; @@ -904,6 +904,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg) n_heavy_reader_ofl_updates++; in_qs = true; } else { + // The task is not running, so C-language access is safe. in_qs = likely(!t->trc_reader_nesting); } @@ -936,7 +937,7 @@ static void trc_wait_for_one_reader(struct task_struct *t, // The current task had better be in a quiescent state. if (t == current) { t->trc_reader_checked = true; - WARN_ON_ONCE(t->trc_reader_nesting); + WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); return; } @@ -1046,7 +1047,7 @@ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport) ".I"[READ_ONCE(t->trc_ipi_to_cpu) > 0], ".i"[is_idle_task(t)], ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], - t->trc_reader_nesting, + READ_ONCE(t->trc_reader_nesting), " N"[!!t->trc_reader_special.b.need_qs], cpu); sched_show_task(t); @@ -1141,7 +1142,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) static void exit_tasks_rcu_finish_trace(struct task_struct *t) { WRITE_ONCE(t->trc_reader_checked, true); - WARN_ON_ONCE(t->trc_reader_nesting); + WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); WRITE_ONCE(t->trc_reader_nesting, 0); if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs))) rcu_read_unlock_trace_special(t, 0); From 3a64cd01cdd6bea5b97cdb914cd2462fa8e2d47a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 15 Jul 2023 00:47:10 +0000 Subject: [PATCH 330/509] rcu-tasks: Mark ->trc_reader_special.b.need_qs data races [ Upstream commit f8ab3fad80dddf3f2cecb53983063c4431058ca1 ] There are several ->trc_reader_special.b.need_qs data races that are too low-probability for KCSAN to notice, but which will happen sooner or later. This commit therefore marks these accesses. Cc: # 5.10.x Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tasks.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 2b16a86163af..aef2e96c3854 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -801,7 +801,7 @@ static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw); /* If we are the last reader, wake up the grace-period kthread. */ void rcu_read_unlock_trace_special(struct task_struct *t, int nesting) { - int nq = t->trc_reader_special.b.need_qs; + int nq = READ_ONCE(t->trc_reader_special.b.need_qs); if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && t->trc_reader_special.b.need_mb) @@ -867,7 +867,7 @@ static void trc_read_check_handler(void *t_in) // Get here if the task is in a read-side critical section. Set // its state so that it will awaken the grace-period kthread upon // exit from that critical section. - WARN_ON_ONCE(t->trc_reader_special.b.need_qs); + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); WRITE_ONCE(t->trc_reader_special.b.need_qs, true); reset_ipi: @@ -919,7 +919,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg) // state so that it will awaken the grace-period kthread upon exit // from that critical section. atomic_inc(&trc_n_readers_need_end); // One more to wait on. - WARN_ON_ONCE(t->trc_reader_special.b.need_qs); + WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); WRITE_ONCE(t->trc_reader_special.b.need_qs, true); return true; } @@ -1048,7 +1048,7 @@ static void show_stalled_task_trace(struct task_struct *t, bool *firstreport) ".i"[is_idle_task(t)], ".N"[cpu > 0 && tick_nohz_full_cpu(cpu)], READ_ONCE(t->trc_reader_nesting), - " N"[!!t->trc_reader_special.b.need_qs], + " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)], cpu); sched_show_task(t); } From 4f91de9a81bd389cd835211b19d5d1fca69f6acc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 15 Jul 2023 00:47:11 +0000 Subject: [PATCH 331/509] rcu-tasks: Simplify trc_read_check_handler() atomic operations [ Upstream commit 96017bf9039763a2e02dcc6adaa18592cd73a39d ] Currently, trc_wait_for_one_reader() atomically increments the trc_n_readers_need_end counter before sending the IPI invoking trc_read_check_handler(). All failure paths out of trc_read_check_handler() and also from the smp_call_function_single() within trc_wait_for_one_reader() must carefully atomically decrement this counter. This is more complex than it needs to be. This commit therefore simplifies things and saves a few lines of code by dispensing with the atomic decrements in favor of having trc_read_check_handler() do the atomic increment only in the success case. In theory, this represents no change in functionality. Cc: # 5.10.x Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/tasks.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index aef2e96c3854..23101ebbbe1e 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -841,32 +841,24 @@ static void trc_read_check_handler(void *t_in) // If the task is no longer running on this CPU, leave. if (unlikely(texp != t)) { - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) - wake_up(&trc_wait); goto reset_ipi; // Already on holdout list, so will check later. } // If the task is not in a read-side critical section, and // if this is the last reader, awaken the grace-period kthread. if (likely(!READ_ONCE(t->trc_reader_nesting))) { - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) - wake_up(&trc_wait); - // Mark as checked after decrement to avoid false - // positives on the above WARN_ON_ONCE(). WRITE_ONCE(t->trc_reader_checked, true); goto reset_ipi; } // If we are racing with an rcu_read_unlock_trace(), try again later. - if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) { - if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end))) - wake_up(&trc_wait); + if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) goto reset_ipi; - } WRITE_ONCE(t->trc_reader_checked, true); // Get here if the task is in a read-side critical section. Set // its state so that it will awaken the grace-period kthread upon // exit from that critical section. + atomic_inc(&trc_n_readers_need_end); // One more to wait on. WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)); WRITE_ONCE(t->trc_reader_special.b.need_qs, true); @@ -960,21 +952,15 @@ static void trc_wait_for_one_reader(struct task_struct *t, if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0) return; - atomic_inc(&trc_n_readers_need_end); per_cpu(trc_ipi_to_cpu, cpu) = true; t->trc_ipi_to_cpu = cpu; rcu_tasks_trace.n_ipis++; - if (smp_call_function_single(cpu, - trc_read_check_handler, t, 0)) { + if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) { // Just in case there is some other reason for // failure than the target CPU being offline. rcu_tasks_trace.n_ipis_fails++; per_cpu(trc_ipi_to_cpu, cpu) = false; t->trc_ipi_to_cpu = cpu; - if (atomic_dec_and_test(&trc_n_readers_need_end)) { - WARN_ON_ONCE(1); - wake_up(&trc_wait); - } } } } From bb2f7e4bfe815150e63633aacd276fb88735e458 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Wed, 5 Jul 2023 11:38:08 +1200 Subject: [PATCH 332/509] block/partition: fix signedness issue for Amiga partitions commit 7eb1e47696aa231b1a567846bbe3a1e1befe1854 upstream. Making 'blk' sector_t (i.e. 64 bit if LBD support is active) fails the 'blk>0' test in the partition block loop if a value of (signed int) -1 is used to mark the end of the partition block list. Explicitly cast 'blk' to signed int to allow use of -1 to terminate the partition block linked list. Fixes: b6f3f28f604b ("block: add overflow checks for Amiga partition support") Reported-by: Christian Zigotzky Link: https://lore.kernel.org/r/024ce4fa-cc6d-50a2-9aae-3701d0ebf668@xenosoft.de Signed-off-by: Michael Schmitz Reviewed-by: Martin Steigerwald Tested-by: Christian Zigotzky Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/partitions/amiga.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c index 7abc09d8fc73..a99ec7f1a174 100644 --- a/block/partitions/amiga.c +++ b/block/partitions/amiga.c @@ -91,7 +91,7 @@ int amiga_partition(struct parsed_partitions *state) } blk = be32_to_cpu(rdb->rdb_PartitionList); put_dev_sector(sect); - for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { + for (part = 1; (s32) blk>0 && part<=16; part++, put_dev_sector(sect)) { /* Read in terms partition table understands */ if (check_mul_overflow(blk, (sector_t) blksize, &blk)) { pr_err("Dev %s: overflow calculating partition block %llu! Skipping partitions %u and beyond\n", From 297883bbcab1b48e97aeb8f1dd3dd99d13238a44 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sun, 16 Jul 2023 12:07:03 -0600 Subject: [PATCH 333/509] io_uring: Use io_schedule* in cqring wait Commit 8a796565cec3601071cbbd27d6304e202019d014 upstream. I observed poor performance of io_uring compared to synchronous IO. That turns out to be caused by deeper CPU idle states entered with io_uring, due to io_uring using plain schedule(), whereas synchronous IO uses io_schedule(). The losses due to this are substantial. On my cascade lake workstation, t/io_uring from the fio repository e.g. yields regressions between 20% and 40% with the following command: ./t/io_uring -r 5 -X0 -d 1 -s 1 -c 1 -p 0 -S$use_sync -R 0 /mnt/t2/fio/write.0.0 This is repeatable with different filesystems, using raw block devices and using different block devices. Use io_schedule_prepare() / io_schedule_finish() in io_cqring_wait_schedule() to address the difference. After that using io_uring is on par or surpassing synchronous IO (using registered files etc makes it reliably win, but arguably is a less fair comparison). There are other calls to schedule() in io_uring/, but none immediately jump out to be similarly situated, so I did not touch them. Similarly, it's possible that mutex_lock_io() should be used, but it's not clear if there are cases where that matters. Cc: stable@vger.kernel.org # 5.10+ Cc: Pavel Begunkov Cc: io-uring@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Andres Freund Link: https://lore.kernel.org/r/20230707162007.194068-1-andres@anarazel.de [axboe: minor style fixup] Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 25f201fc3ffd..3d5fb465eed7 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -7625,7 +7625,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, ktime_t *timeout) { - int ret; + int token, ret; /* make sure we run task_work before checking for signals */ ret = io_run_task_work_sig(); @@ -7635,9 +7635,17 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, if (test_bit(0, &ctx->check_cq_overflow)) return 1; + /* + * Use io_schedule_prepare/finish, so cpufreq can take into account + * that the task is waiting for IO - turns out to be important for low + * QD IO. + */ + token = io_schedule_prepare(); + ret = 1; if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) - return -ETIME; - return 1; + ret = -ETIME; + io_schedule_finish(token); + return ret; } /* From dc4a25fa75659515fd61980d77e071b406db73b0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 17 Jul 2023 10:27:20 -0600 Subject: [PATCH 334/509] io_uring: add reschedule point to handle_tw_list() Commit f58680085478dd292435727210122960d38e8014 upstream. If CONFIG_PREEMPT_NONE is set and the task_work chains are long, we could be running into issues blocking others for too long. Add a reschedule check in handle_tw_list(), and flush the ctx if we need to reschedule. Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3d5fb465eed7..51e6ebe72caf 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2214,9 +2214,12 @@ static void tctx_task_work(struct callback_head *cb) } req->io_task_work.func(req, &locked); node = next; + if (unlikely(need_resched())) { + ctx_flush_and_put(ctx, &locked); + ctx = NULL; + cond_resched(); + } } while (node); - - cond_resched(); } ctx_flush_and_put(ctx, &locked); From fc359e5b45da6ef1c40436cecc18b201dce95df8 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Tue, 27 Jun 2023 03:50:00 +0000 Subject: [PATCH 335/509] net: lan743x: Don't sleep in atomic context commit 7a8227b2e76be506b2ac64d2beac950ca04892a5 upstream. dev_set_rx_mode() grabs a spin_lock, and the lan743x implementation proceeds subsequently to go to sleep using readx_poll_timeout(). Introduce a helper wrapping the readx_poll_timeout_atomic() function and use it to replace the calls to readx_polL_timeout(). Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Cc: stable@vger.kernel.org Cc: Bryan Whitehead Cc: UNGLinuxDriver@microchip.com Signed-off-by: Moritz Fischer Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230627035000.1295254-1-moritzf@google.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/microchip/lan743x_main.c | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 481f89d193f7..50cb1c5251f7 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -83,6 +83,18 @@ static int lan743x_csr_light_reset(struct lan743x_adapter *adapter) !(data & HW_CFG_LRST_), 100000, 10000000); } +static int lan743x_csr_wait_for_bit_atomic(struct lan743x_adapter *adapter, + int offset, u32 bit_mask, + int target_value, int udelay_min, + int udelay_max, int count) +{ + u32 data; + + return readx_poll_timeout_atomic(LAN743X_CSR_READ_OP, offset, data, + target_value == !!(data & bit_mask), + udelay_max, udelay_min * count); +} + static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter, int offset, u32 bit_mask, int target_value, int usleep_min, @@ -678,8 +690,8 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, u32 dp_sel; int i; - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; dp_sel = lan743x_csr_read(adapter, DP_SEL); dp_sel &= ~DP_SEL_MASK_; @@ -690,8 +702,9 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, lan743x_csr_write(adapter, DP_ADDR, addr + i); lan743x_csr_write(adapter, DP_DATA_0, buf[i]); lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_); - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, + DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; } From 13c353dc5c2e6c0412cd6009218af0ddc8fcd81e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Jun 2023 12:08:14 -0700 Subject: [PATCH 336/509] workqueue: clean up WORK_* constant types, clarify masking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit afa4bb778e48d79e4a642ed41e3b4e0de7489a6c upstream. Dave Airlie reports that gcc-13.1.1 has started complaining about some of the workqueue code in 32-bit arm builds: kernel/workqueue.c: In function ā€˜get_work_pwqā€™: kernel/workqueue.c:713:24: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] 713 | return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); | ^ [ ... a couple of other cases ... ] and while it's not immediately clear exactly why gcc started complaining about it now, I suspect it's some C23-induced enum type handlign fixup in gcc-13 is the cause. Whatever the reason for starting to complain, the code and data types are indeed disgusting enough that the complaint is warranted. The wq code ends up creating various "helper constants" (like that WORK_STRUCT_WQ_DATA_MASK) using an enum type, which is all kinds of confused. The mask needs to be 'unsigned long', not some unspecified enum type. To make matters worse, the actual "mask and cast to a pointer" is repeated a couple of times, and the cast isn't even always done to the right pointer, but - as the error case above - to a 'void *' with then the compiler finishing the job. That's now how we roll in the kernel. So create the masks using the proper types rather than some ambiguous enumeration, and use a nice helper that actually does the type conversion in one well-defined place. Incidentally, this magically makes clang generate better code. That, admittedly, is really just a sign of clang having been seriously confused before, and cleaning up the typing unconfuses the compiler too. Reported-by: Dave Airlie Link: https://lore.kernel.org/lkml/CAPM=9twNnV4zMCvrPkw3H-ajZOH-01JVh_kDrxdPYQErz8ZTdA@mail.gmail.com/ Cc: Arnd Bergmann Cc: Tejun Heo Cc: Nick Desaulniers Cc: Nathan Chancellor Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/workqueue.h | 15 ++++++++------- kernel/workqueue.c | 13 ++++++++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2fa9b311e566..460c58fa011a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -73,7 +73,6 @@ enum { WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, - WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING), /* * When a work item is off queue, its high bits point to the last @@ -84,12 +83,6 @@ enum { WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, - WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1, - - /* convenience constants */ - WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, - WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, - WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, @@ -99,6 +92,14 @@ enum { WORKER_DESC_LEN = 24, }; +/* Convenience constants - of type 'unsigned long', not 'enum'! */ +#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING) +#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) +#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) + +#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1) +#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) + struct work_struct { atomic_long_t data; struct list_head entry; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b9041ab881bc..fa0a0e59b385 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -679,12 +679,17 @@ static void clear_work_data(struct work_struct *work) set_work_data(work, WORK_STRUCT_NO_POOL, 0); } +static inline struct pool_workqueue *work_struct_pwq(unsigned long data) +{ + return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK); +} + static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) - return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); + return work_struct_pwq(data); else return NULL; } @@ -712,8 +717,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work) assert_rcu_or_pool_mutex(); if (data & WORK_STRUCT_PWQ) - return ((struct pool_workqueue *) - (data & WORK_STRUCT_WQ_DATA_MASK))->pool; + return work_struct_pwq(data)->pool; pool_id = data >> WORK_OFFQ_POOL_SHIFT; if (pool_id == WORK_OFFQ_POOL_NONE) @@ -734,8 +738,7 @@ static int get_work_pool_id(struct work_struct *work) unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) - return ((struct pool_workqueue *) - (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id; + return work_struct_pwq(data)->pool->id; return data >> WORK_OFFQ_POOL_SHIFT; } From 547ab8ea86c1e7d078845454afac6319029e2819 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 20 Jun 2023 08:22:02 -0300 Subject: [PATCH 337/509] drm/panel: simple: Add connector_type for innolux_at043tn24 [ Upstream commit 2c56a751845ddfd3078ebe79981aaaa182629163 ] The innolux at043tn24 display is a parallel LCD. Pass the 'connector_type' information to avoid the following warning: panel-simple panel: Specify missing connector_type Signed-off-by: Fabio Estevam Fixes: 41bcceb4de9c ("drm/panel: simple: Add support for Innolux AT043TN24") Reviewed-by: Sam Ravnborg Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230620112202.654981-1-festevam@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index b0b92f436879..ffda99c20435 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2091,6 +2091,7 @@ static const struct panel_desc innolux_at043tn24 = { .height = 54, }, .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .connector_type = DRM_MODE_CONNECTOR_DPI, .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE, }; From 2a587b71c532977498fdd5d19b3384442a0057b7 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 15 Jun 2023 22:16:02 +0200 Subject: [PATCH 338/509] drm/panel: simple: Add Powertip PH800480T013 drm_display_mode flags [ Upstream commit 1c519980aced3da1fae37c1339cf43b24eccdee7 ] Add missing drm_display_mode DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC flags. Those are used by various bridges in the pipeline to correctly configure its sync signals polarity. Fixes: d69de69f2be1 ("drm/panel: simple: Add Powertip PH800480T013 panel") Signed-off-by: Marek Vasut Reviewed-by: Sam Ravnborg Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230615201602.565948-1-marex@denx.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index ffda99c20435..7b69f81444eb 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3153,6 +3153,7 @@ static const struct drm_display_mode powertip_ph800480t013_idf02_mode = { .vsync_start = 480 + 49, .vsync_end = 480 + 49 + 2, .vtotal = 480 + 49 + 2 + 22, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, }; static const struct panel_desc powertip_ph800480t013_idf02 = { From 3d4bba694aed2b10be5f925a46a3ffe40d904bdc Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Wed, 17 May 2023 08:18:12 +0800 Subject: [PATCH 339/509] igc: Remove delay during TX ring configuration [ Upstream commit cca28ceac7c7857bc2d313777017585aef00bcc4 ] Remove unnecessary delay during the TX ring configuration. This will cause delay, especially during link down and link up activity. Furthermore, old SKUs like as I225 will call the reset_adapter to reset the controller during TSN mode Gate Control List (GCL) setting. This will add more time to the configuration of the real-time use case. It doesn't mentioned about this delay in the Software User Manual. It might have been ported from legacy code I210 in the past. Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Signed-off-by: Muhammad Husaini Zulkifli Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index a15e4b6d7fa4..2b51ee87a2de 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -600,7 +600,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, /* disable the queue */ wr32(IGC_TXDCTL(reg_idx), 0); wrfl(); - mdelay(10); wr32(IGC_TDLEN(reg_idx), ring->count * sizeof(union igc_adv_tx_desc)); From de6e6b07974c45141835ca1176ba8bc9f59346f2 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 28 Jun 2023 08:59:34 +0800 Subject: [PATCH 340/509] net/mlx5e: fix double free in mlx5e_destroy_flow_table [ Upstream commit 884abe45a9014d0de2e6edb0630dfd64f23f1d1b ] In function accel_fs_tcp_create_groups(), when the ft->g memory is successfully allocated but the 'in' memory fails to be allocated, the memory pointed to by ft->g is released once. And in function accel_fs_tcp_create_table, mlx5e_destroy_flow_table is called to release the memory pointed to by ft->g again. This will cause double free problem. Fixes: c062d52ac24c ("net/mlx5e: Receive flow steering framework for accelerated TCP flows") Signed-off-by: Zhengchao Shao Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index e51f60b55daa..2da90f6649d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -194,6 +194,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } From 30c281a77fb1b2d362030ea243dd663201d62a21 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 8 Jun 2023 09:32:10 +0200 Subject: [PATCH 341/509] net/mlx5e: Check for NOT_READY flag state after locking [ Upstream commit 65e64640e97c0f223e77f9ea69b5a46186b93470 ] Currently the check for NOT_READY flag is performed before obtaining the necessary lock. This opens a possibility for race condition when the flow is concurrently removed from unready_flows list by the workqueue task, which causes a double-removal from the list and a crash[0]. Fix the issue by moving the flag check inside the section protected by uplink_priv->unready_flows_lock mutex. [0]: [44376.389654] general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] SMP [44376.391665] CPU: 7 PID: 59123 Comm: tc Not tainted 6.4.0-rc4+ #1 [44376.392984] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [44376.395342] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.396857] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 [44376.399167] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 [44376.399680] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 [44376.400337] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 [44376.401001] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 [44376.401663] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 [44376.402342] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 [44376.402999] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 [44376.403787] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [44376.404343] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 [44376.405004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [44376.405665] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [44376.406339] Call Trace: [44376.406651] [44376.406939] ? die_addr+0x33/0x90 [44376.407311] ? exc_general_protection+0x192/0x390 [44376.407795] ? asm_exc_general_protection+0x22/0x30 [44376.408292] ? mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.408876] __mlx5e_tc_del_fdb_peer_flow+0xbc/0xe0 [mlx5_core] [44376.409482] mlx5e_tc_del_flow+0x42/0x210 [mlx5_core] [44376.410055] mlx5e_flow_put+0x25/0x50 [mlx5_core] [44376.410529] mlx5e_delete_flower+0x24b/0x350 [mlx5_core] [44376.411043] tc_setup_cb_reoffload+0x22/0x80 [44376.411462] fl_reoffload+0x261/0x2f0 [cls_flower] [44376.411907] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] [44376.412481] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] [44376.413044] tcf_block_playback_offloads+0x76/0x170 [44376.413497] tcf_block_unbind+0x7b/0xd0 [44376.413881] tcf_block_setup+0x17d/0x1c0 [44376.414269] tcf_block_offload_cmd.isra.0+0xf1/0x130 [44376.414725] tcf_block_offload_unbind+0x43/0x70 [44376.415153] __tcf_block_put+0x82/0x150 [44376.415532] ingress_destroy+0x22/0x30 [sch_ingress] [44376.415986] qdisc_destroy+0x3b/0xd0 [44376.416343] qdisc_graft+0x4d0/0x620 [44376.416706] tc_get_qdisc+0x1c9/0x3b0 [44376.417074] rtnetlink_rcv_msg+0x29c/0x390 [44376.419978] ? rep_movs_alternative+0x3a/0xa0 [44376.420399] ? rtnl_calcit.isra.0+0x120/0x120 [44376.420813] netlink_rcv_skb+0x54/0x100 [44376.421192] netlink_unicast+0x1f6/0x2c0 [44376.421573] netlink_sendmsg+0x232/0x4a0 [44376.421980] sock_sendmsg+0x38/0x60 [44376.422328] ____sys_sendmsg+0x1d0/0x1e0 [44376.422709] ? copy_msghdr_from_user+0x6d/0xa0 [44376.423127] ___sys_sendmsg+0x80/0xc0 [44376.423495] ? ___sys_recvmsg+0x8b/0xc0 [44376.423869] __sys_sendmsg+0x51/0x90 [44376.424226] do_syscall_64+0x3d/0x90 [44376.424587] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [44376.425046] RIP: 0033:0x7f045134f887 [44376.425403] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [44376.426914] RSP: 002b:00007ffd63a82b98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [44376.427592] RAX: ffffffffffffffda RBX: 000000006481955f RCX: 00007f045134f887 [44376.428195] RDX: 0000000000000000 RSI: 00007ffd63a82c00 RDI: 0000000000000003 [44376.428796] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [44376.429404] R10: 00007f0451208708 R11: 0000000000000246 R12: 0000000000000001 [44376.430039] R13: 0000000000409980 R14: 000000000047e538 R15: 0000000000485400 [44376.430644] [44376.430907] Modules linked in: mlx5_ib mlx5_core act_mirred act_tunnel_key cls_flower vxlan dummy sch_ingress openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_g ss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: mlx5_core] [44376.433936] ---[ end trace 0000000000000000 ]--- [44376.434373] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.434951] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 [44376.436452] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 [44376.436924] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 [44376.437530] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 [44376.438179] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 [44376.438786] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 [44376.439393] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 [44376.439998] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 [44376.440714] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [44376.441225] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 [44376.441843] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [44376.442471] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: ad86755b18d5 ("net/mlx5e: Protect unready flows with dedicated lock") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 16846442717d..c6a81a51530d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1334,7 +1334,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) uplink_priv = &rpriv->uplink_priv; mutex_lock(&uplink_priv->unready_flows_lock); - unready_flow_del(flow); + if (flow_flag_test(flow, NOT_READY)) + unready_flow_del(flow); mutex_unlock(&uplink_priv->unready_flows_lock); } @@ -1475,8 +1476,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_put_flow_tunnel_id(flow); - if (flow_flag_test(flow, NOT_READY)) - remove_unready_flow(flow); + remove_unready_flow(flow); if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) From b687b7836157d3a8afbe778957f7199003d7460f Mon Sep 17 00:00:00 2001 From: Prasad Koya Date: Mon, 5 Jun 2023 11:09:01 -0700 Subject: [PATCH 342/509] igc: set TP bit in 'supported' and 'advertising' fields of ethtool_link_ksettings [ Upstream commit 9ac3fc2f42e5ffa1e927dcbffb71b15fa81459e2 ] set TP bit in the 'supported' and 'advertising' fields. i225/226 parts only support twisted pair copper. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Prasad Koya Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index da259cd59add..d28ac3a025ab 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1673,6 +1673,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, /* twisted pair */ cmd->base.port = PORT_TP; cmd->base.phy_address = hw->phy.addr; + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); /* advertising link modes */ if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) From c175603d84d3b83e180204b48cda15514e395e22 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 26 Jun 2023 13:58:47 +0300 Subject: [PATCH 343/509] scsi: qla2xxx: Fix error code in qla2x00_start_sp() [ Upstream commit e579b007eff3ff8d29d59d16214cd85fb9e573f7 ] This should be negative -EAGAIN instead of positive. The callers treat non-zero error codes the same so it doesn't really impact runtime beyond some trivial differences to debug output. Fixes: 80676d054e5a ("scsi: qla2xxx: Fix session cleanup hang") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/49866d28-4cfe-47b0-842b-78f110e61aab@moroto.mountain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_iocb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index e54cc2a761dd..f0af76c3de7e 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -3713,7 +3713,7 @@ qla2x00_start_sp(srb_t *sp) spin_lock_irqsave(qp->qp_lock_ptr, flags); pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); if (!pkt) { - rval = EAGAIN; + rval = -EAGAIN; ql_log(ql_log_warn, vha, 0x700c, "qla2x00_alloc_iocbs failed.\n"); goto done; From d341f246123e144b3d0ce908d254dee5664f2ff4 Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Wed, 5 Jul 2023 07:37:12 +0200 Subject: [PATCH 344/509] net: mvneta: fix txq_map in case of txq_number==1 [ Upstream commit 21327f81db6337c8843ce755b01523c7d3df715b ] If we boot with mvneta.txq_number=1, the txq_map is set incorrectly: MVNETA_CPU_TXQ_ACCESS(1) refers to TX queue 1, but only TX queue 0 is initialized. Fix this. Fixes: 50bf8cb6fc9c ("net: mvneta: Configure XPS support") Signed-off-by: Klaus Kudielka Reviewed-by: Michal Kubiak Link: https://lore.kernel.org/r/20230705053712.3914-1-klaus.kudielka@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f5567d485e91..3656a3937eca 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1471,7 +1471,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) */ if (txq_number == 1) txq_map = (cpu == pp->rxq_def) ? - MVNETA_CPU_TXQ_ACCESS(1) : 0; + MVNETA_CPU_TXQ_ACCESS(0) : 0; } else { txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK; @@ -4165,7 +4165,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) */ if (txq_number == 1) txq_map = (cpu == elected_cpu) ? - MVNETA_CPU_TXQ_ACCESS(1) : 0; + MVNETA_CPU_TXQ_ACCESS(0) : 0; else txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) & MVNETA_CPU_TXQ_ACCESS_ALL_MASK; From 80e0e8d5f54397c5048fa2274144134dd9dc91b5 Mon Sep 17 00:00:00 2001 From: M A Ramdhan Date: Wed, 5 Jul 2023 12:15:30 -0400 Subject: [PATCH 345/509] net/sched: cls_fw: Fix improper refcount update leads to use-after-free [ Upstream commit 0323bce598eea038714f941ce2b22541c46d488f ] In the event of a failure in tcf_change_indev(), fw_set_parms() will immediately return an error after incrementing or decrementing reference counter in tcf_bind_filter(). If attacker can control reference counter to zero and make reference freed, leading to use after free. In order to prevent this, move the point of possible failure above the point where the TC_FW_CLASSID is handled. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: M A Ramdhan Signed-off-by: M A Ramdhan Acked-by: Jamal Hadi Salim Reviewed-by: Pedro Tammela Message-ID: <20230705161530.52003-1-ramdhan@starlabs.sg> Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/cls_fw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ec945294626a..41f0898a5a56 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -210,11 +210,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, if (err < 0) return err; - if (tb[TCA_FW_CLASSID]) { - f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); - tcf_bind_filter(tp, &f->res, base); - } - if (tb[TCA_FW_INDEV]) { int ret; ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack); @@ -231,6 +226,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, } else if (head->mask != 0xFFFFFFFF) return err; + if (tb[TCA_FW_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); + tcf_bind_filter(tp, &f->res, base); + } + return 0; } From 9085429821b442f32d93b4af67d7adb2bb881820 Mon Sep 17 00:00:00 2001 From: Junfeng Guo Date: Thu, 6 Jul 2023 12:41:28 +0800 Subject: [PATCH 346/509] gve: Set default duplex configuration to full [ Upstream commit 0503efeadbf6bb8bf24397613a73b67e665eac5f ] Current duplex mode was unset in the driver, resulting in the default parameter being set to 0, which corresponds to half duplex. It might mislead users to have incorrect expectation about the driver's transmission capabilities. Set the default duplex configuration to full, as the driver runs in full duplex mode at this point. Fixes: 7e074d5a76ca ("gve: Enable Link Speed Reporting in the driver.") Signed-off-by: Junfeng Guo Reviewed-by: Leon Romanovsky Message-ID: <20230706044128.2726747-1-junfeng.guo@intel.com> Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index e0449cc24fbd..cbfd00744935 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -516,6 +516,9 @@ static int gve_get_link_ksettings(struct net_device *netdev, err = gve_adminq_report_link_speed(priv); cmd->base.speed = priv->link_speed; + + cmd->base.duplex = DUPLEX_FULL; + return err; } From f8cc4fd99a325505e15c3da95d6de266efd3d9b5 Mon Sep 17 00:00:00 2001 From: Nitya Sunkad Date: Thu, 6 Jul 2023 11:20:06 -0700 Subject: [PATCH 347/509] ionic: remove WARN_ON to prevent panic_on_warn [ Upstream commit abfb2a58a5377ebab717d4362d6180f901b6e5c1 ] Remove unnecessary early code development check and the WARN_ON that it uses. The irq alloc and free paths have long been cleaned up and this check shouldn't have stuck around so long. Fixes: 77ceb68e29cc ("ionic: Add notifyq support") Signed-off-by: Nitya Sunkad Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index fcd4213c99b8..098772601df8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -433,11 +433,6 @@ static void ionic_qcqs_free(struct ionic_lif *lif) static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq, struct ionic_qcq *n_qcq) { - if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) { - ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index); - n_qcq->flags &= ~IONIC_QCQ_F_INTR; - } - n_qcq->intr.vector = src_qcq->intr.vector; n_qcq->intr.index = src_qcq->intr.index; } From cddd04f341245949258d304f0bc961d298560f61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 7 Jul 2023 08:53:25 +0200 Subject: [PATCH 348/509] net: bgmac: postpone turning IRQs off to avoid SoC hangs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e7731194fdf085f46d58b1adccfddbd0dfee4873 ] Turning IRQs off is done by accessing Ethernet controller registers. That can't be done until device's clock is enabled. It results in a SoC hang otherwise. This bug remained unnoticed for years as most bootloaders keep all Ethernet interfaces turned on. It seems to only affect a niche SoC family BCM47189. It has two Ethernet controllers but CFE bootloader uses only the first one. Fixes: 34322615cbaa ("net: bgmac: Mask interrupts during probe") Signed-off-by: Rafał Miłecki Reviewed-by: Michal Kubiak Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bgmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index bb999e67d773..ab8ee9331635 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1492,8 +1492,6 @@ int bgmac_enet_probe(struct bgmac *bgmac) bgmac->in_init = true; - bgmac_chip_intrs_off(bgmac); - net_dev->irq = bgmac->irq; SET_NETDEV_DEV(net_dev, bgmac->dev); dev_set_drvdata(bgmac->dev, bgmac); @@ -1511,6 +1509,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) */ bgmac_clk_enable(bgmac, 0); + bgmac_chip_intrs_off(bgmac); + /* This seems to be fixing IRQ by assigning OOB #6 to the core */ if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) { if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) From bc3ab5d2ab69823f5cff89cf74ef78ffa0386c9a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Jul 2023 10:11:10 +0200 Subject: [PATCH 349/509] net: prevent skb corruption on frag list segmentation [ Upstream commit c329b261afe71197d9da83c1f18eb45a7e97e089 ] Ian reported several skb corruptions triggered by rx-gro-list, collecting different oops alike: [ 62.624003] BUG: kernel NULL pointer dereference, address: 00000000000000c0 [ 62.631083] #PF: supervisor read access in kernel mode [ 62.636312] #PF: error_code(0x0000) - not-present page [ 62.641541] PGD 0 P4D 0 [ 62.644174] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 62.648629] CPU: 1 PID: 913 Comm: napi/eno2-79 Not tainted 6.4.0 #364 [ 62.655162] Hardware name: Supermicro Super Server/A2SDi-12C-HLN4F, BIOS 1.7a 10/13/2022 [ 62.663344] RIP: 0010:__udp_gso_segment (./include/linux/skbuff.h:2858 ./include/linux/udp.h:23 net/ipv4/udp_offload.c:228 net/ipv4/udp_offload.c:261 net/ipv4/udp_offload.c:277) [ 62.687193] RSP: 0018:ffffbd3a83b4f868 EFLAGS: 00010246 [ 62.692515] RAX: 00000000000000ce RBX: 0000000000000000 RCX: 0000000000000000 [ 62.699743] RDX: ffffa124def8a000 RSI: 0000000000000079 RDI: ffffa125952a14d4 [ 62.706970] RBP: ffffa124def8a000 R08: 0000000000000022 R09: 00002000001558c9 [ 62.714199] R10: 0000000000000000 R11: 00000000be554639 R12: 00000000000000e2 [ 62.721426] R13: ffffa125952a1400 R14: ffffa125952a1400 R15: 00002000001558c9 [ 62.728654] FS: 0000000000000000(0000) GS:ffffa127efa40000(0000) knlGS:0000000000000000 [ 62.736852] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 62.742702] CR2: 00000000000000c0 CR3: 00000001034b0000 CR4: 00000000003526e0 [ 62.749948] Call Trace: [ 62.752498] [ 62.779267] inet_gso_segment (net/ipv4/af_inet.c:1398) [ 62.787605] skb_mac_gso_segment (net/core/gro.c:141) [ 62.791906] __skb_gso_segment (net/core/dev.c:3403 (discriminator 2)) [ 62.800492] validate_xmit_skb (./include/linux/netdevice.h:4862 net/core/dev.c:3659) [ 62.804695] validate_xmit_skb_list (net/core/dev.c:3710) [ 62.809158] sch_direct_xmit (net/sched/sch_generic.c:330) [ 62.813198] __dev_queue_xmit (net/core/dev.c:3805 net/core/dev.c:4210) net/netfilter/core.c:626) [ 62.821093] br_dev_queue_push_xmit (net/bridge/br_forward.c:55) [ 62.825652] maybe_deliver (net/bridge/br_forward.c:193) [ 62.829420] br_flood (net/bridge/br_forward.c:233) [ 62.832758] br_handle_frame_finish (net/bridge/br_input.c:215) [ 62.837403] br_handle_frame (net/bridge/br_input.c:298 net/bridge/br_input.c:416) [ 62.851417] __netif_receive_skb_core.constprop.0 (net/core/dev.c:5387) [ 62.866114] __netif_receive_skb_list_core (net/core/dev.c:5570) [ 62.871367] netif_receive_skb_list_internal (net/core/dev.c:5638 net/core/dev.c:5727) [ 62.876795] napi_complete_done (./include/linux/list.h:37 ./include/net/gro.h:434 ./include/net/gro.h:429 net/core/dev.c:6067) [ 62.881004] ixgbe_poll (drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:3191) [ 62.893534] __napi_poll (net/core/dev.c:6498) [ 62.897133] napi_threaded_poll (./include/linux/netpoll.h:89 net/core/dev.c:6640) [ 62.905276] kthread (kernel/kthread.c:379) [ 62.913435] ret_from_fork (arch/x86/entry/entry_64.S:314) [ 62.917119] In the critical scenario, rx-gro-list GRO-ed packets are fed, via a bridge, both to the local input path and to an egress device (tun). The segmentation of such packets unsafely writes to the cloned skbs with shared heads. This change addresses the issue by uncloning as needed the to-be-segmented skbs. Reported-by: Ian Kumlien Tested-by: Ian Kumlien Fixes: 3a1296a38d0c ("net: Support GRO/GSO fraglist chaining.") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/skbuff.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e203172b9b9e..b10285d06a2c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3685,6 +3685,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(skb, -skb_network_offset(skb) + offset); + /* Ensure the head is writeable before touching the shared info */ + err = skb_unclone(skb, GFP_ATOMIC); + if (err) + goto err_linearize; + skb_shinfo(skb)->frag_list = NULL; while (list_skb) { From d30ddd7ff15df9d91a793ce3f06f0190ff7afacc Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Jul 2023 18:43:27 -0700 Subject: [PATCH 350/509] icmp6: Fix null-ptr-deref of ip6_null_entry->rt6i_idev in icmp6_dev(). [ Upstream commit 2aaa8a15de73874847d62eb595c6683bface80fd ] With some IPv6 Ext Hdr (RPL, SRv6, etc.), we can send a packet that has the link-local address as src and dst IP and will be forwarded to an external IP in the IPv6 Ext Hdr. For example, the script below generates a packet whose src IP is the link-local address and dst is updated to 11::. # for f in $(find /proc/sys/net/ -name *seg6_enabled*); do echo 1 > $f; done # python3 >>> from socket import * >>> from scapy.all import * >>> >>> SRC_ADDR = DST_ADDR = "fe80::5054:ff:fe12:3456" >>> >>> pkt = IPv6(src=SRC_ADDR, dst=DST_ADDR) >>> pkt /= IPv6ExtHdrSegmentRouting(type=4, addresses=["11::", "22::"], segleft=1) >>> >>> sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW) >>> sk.sendto(bytes(pkt), (DST_ADDR, 0)) For such a packet, we call ip6_route_input() to look up a route for the next destination in these three functions depending on the header type. * ipv6_rthdr_rcv() * ipv6_rpl_srh_rcv() * ipv6_srh_rcv() If no route is found, ip6_null_entry is set to skb, and the following dst_input(skb) calls ip6_pkt_drop(). Finally, in icmp6_dev(), we dereference skb_rt6_info(skb)->rt6i_idev->dev as the input device is the loopback interface. Then, we have to check if skb_rt6_info(skb)->rt6i_idev is NULL or not to avoid NULL pointer deref for ip6_null_entry. BUG: kernel NULL pointer dereference, address: 0000000000000000 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 157 Comm: python3 Not tainted 6.4.0-11996-gb121d614371c #35 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:icmp6_send (net/ipv6/icmp.c:436 net/ipv6/icmp.c:503) Code: fe ff ff 48 c7 40 30 c0 86 5d 83 e8 c6 44 1c 00 e9 c8 fc ff ff 49 8b 46 58 48 83 e0 fe 0f 84 4a fb ff ff 48 8b 80 d0 00 00 00 <48> 8b 00 44 8b 88 e0 00 00 00 e9 34 fb ff ff 4d 85 ed 0f 85 69 01 RSP: 0018:ffffc90000003c70 EFLAGS: 00000286 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000000e0 RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff888006d72a18 RBP: ffffc90000003d80 R08: 0000000000000000 R09: 0000000000000001 R10: ffffc90000003d98 R11: 0000000000000040 R12: ffff888006d72a10 R13: 0000000000000000 R14: ffff8880057fb800 R15: ffffffff835d86c0 FS: 00007f9dc72ee740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000057b2000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ip6_pkt_drop (net/ipv6/route.c:4513) ipv6_rthdr_rcv (net/ipv6/exthdrs.c:640 net/ipv6/exthdrs.c:686) ip6_protocol_deliver_rcu (net/ipv6/ip6_input.c:437 (discriminator 5)) ip6_input_finish (./include/linux/rcupdate.h:781 net/ipv6/ip6_input.c:483) __netif_receive_skb_one_core (net/core/dev.c:5455) process_backlog (./include/linux/rcupdate.h:781 net/core/dev.c:5895) __napi_poll (net/core/dev.c:6460) net_rx_action (net/core/dev.c:6529 net/core/dev.c:6660) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554) do_softirq (kernel/softirq.c:454 kernel/softirq.c:441) __local_bh_enable_ip (kernel/softirq.c:381) __dev_queue_xmit (net/core/dev.c:4231) ip6_finish_output2 (./include/net/neighbour.h:544 net/ipv6/ip6_output.c:135) rawv6_sendmsg (./include/net/dst.h:458 ./include/linux/netfilter.h:303 net/ipv6/raw.c:656 net/ipv6/raw.c:914) sock_sendmsg (net/socket.c:725 net/socket.c:748) __sys_sendto (net/socket.c:2134) __x64_sys_sendto (net/socket.c:2146 net/socket.c:2142 net/socket.c:2142) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) RIP: 0033:0x7f9dc751baea Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffe98712c38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007ffe98712cf8 RCX: 00007f9dc751baea RDX: 0000000000000060 RSI: 00007f9dc6460b90 RDI: 0000000000000003 RBP: 00007f9dc56e8be0 R08: 00007ffe98712d70 R09: 000000000000001c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007f9dc6af5d1b Modules linked in: CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:icmp6_send (net/ipv6/icmp.c:436 net/ipv6/icmp.c:503) Code: fe ff ff 48 c7 40 30 c0 86 5d 83 e8 c6 44 1c 00 e9 c8 fc ff ff 49 8b 46 58 48 83 e0 fe 0f 84 4a fb ff ff 48 8b 80 d0 00 00 00 <48> 8b 00 44 8b 88 e0 00 00 00 e9 34 fb ff ff 4d 85 ed 0f 85 69 01 RSP: 0018:ffffc90000003c70 EFLAGS: 00000286 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000000e0 RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff888006d72a18 RBP: ffffc90000003d80 R08: 0000000000000000 R09: 0000000000000001 R10: ffffc90000003d98 R11: 0000000000000040 R12: ffff888006d72a10 R13: 0000000000000000 R14: ffff8880057fb800 R15: ffffffff835d86c0 FS: 00007f9dc72ee740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000057b2000 CR4: 00000000007506f0 PKRU: 55555554 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled Fixes: 4832c30d5458 ("net: ipv6: put host and anycast routes on device with address") Reported-by: Wang Yufen Closes: https://lore.kernel.org/netdev/c41403a9-c2f6-3b7e-0c96-e1901e605cd0@huawei.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/icmp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fd1f896115c1..d01165bb6a32 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -429,7 +429,10 @@ static struct net_device *icmp6_dev(const struct sk_buff *skb) if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); - if (rt6) + /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.), + * and ip6_null_entry could be set to skb if no route is found. + */ + if (rt6 && rt6->rt6i_idev) dev = rt6->rt6i_idev->dev; } From 3e8fed805cf3f8597bdfd0bea923f5a756fbe726 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 8 Jul 2023 08:29:58 +0000 Subject: [PATCH 351/509] udp6: fix udp6_ehashfn() typo [ Upstream commit 51d03e2f2203e76ed02d33fb5ffbb5fc85ffaf54 ] Amit Klein reported that udp6_ehash_secret was initialized but never used. Fixes: 1bbdceef1e53 ("inet: convert inet_ehash_secret and ipv6_hash_secret to net_get_random_once") Reported-by: Amit Klein Signed-off-by: Eric Dumazet Cc: Willy Tarreau Cc: Willem de Bruijn Cc: David Ahern Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 19c0721399d9..788bb19f32e9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -87,7 +87,7 @@ static u32 udp6_ehashfn(const struct net *net, fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, - udp_ipv6_hash_secret + net_hash_mix(net)); + udp6_ehash_secret + net_hash_mix(net)); } int udp_v6_get_port(struct sock *sk, unsigned short snum) From 0bb2683b0cde42cec4eae87a3ecc064b9714253e Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sat, 5 Nov 2022 09:43:01 +0000 Subject: [PATCH 352/509] ntb: idt: Fix error handling in idt_pci_driver_init() [ Upstream commit c012968259b451dc4db407f2310fe131eaefd800 ] A problem about ntb_hw_idt create debugfs failed is triggered with the following log given: [ 1236.637636] IDT PCI-E Non-Transparent Bridge Driver 2.0 [ 1236.639292] debugfs: Directory 'ntb_hw_idt' with parent '/' already present! The reason is that idt_pci_driver_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ntb_hw_idt can never be created later. idt_pci_driver_init() debugfs_create_dir() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: bf2a952d31d2 ("NTB: Add IDT 89HPESxNTx PCIe-switches support") Signed-off-by: Yuan Can Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/idt/ntb_hw_idt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index d54261f50851..99711dd0b6e8 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -2902,6 +2902,7 @@ static struct pci_driver idt_pci_driver = { static int __init idt_pci_driver_init(void) { + int ret; pr_info("%s %s\n", NTB_DESC, NTB_VER); /* Create the top DebugFS directory if the FS is initialized */ @@ -2909,7 +2910,11 @@ static int __init idt_pci_driver_init(void) dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); /* Register the NTB hardware driver to handle the PCI device */ - return pci_register_driver(&idt_pci_driver); + ret = pci_register_driver(&idt_pci_driver); + if (ret) + debugfs_remove_recursive(dbgfs_topdir); + + return ret; } module_init(idt_pci_driver_init); From 23e09f0a868f059b75d46b65770e4bab081bad94 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sat, 5 Nov 2022 09:43:09 +0000 Subject: [PATCH 353/509] NTB: amd: Fix error handling in amd_ntb_pci_driver_init() [ Upstream commit 98af0a33c1101c29b3ce4f0cf4715fd927c717f9 ] A problem about ntb_hw_amd create debugfs failed is triggered with the following log given: [ 618.431232] AMD(R) PCI-E Non-Transparent Bridge Driver 1.0 [ 618.433284] debugfs: Directory 'ntb_hw_amd' with parent '/' already present! The reason is that amd_ntb_pci_driver_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ntb_hw_amd can never be created later. amd_ntb_pci_driver_init() debugfs_create_dir() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: a1b3695820aa ("NTB: Add support for AMD PCI-Express Non-Transparent Bridge") Signed-off-by: Yuan Can Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/amd/ntb_hw_amd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index 71428d8cbcfc..ac401ad7884a 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -1344,12 +1344,17 @@ static struct pci_driver amd_ntb_pci_driver = { static int __init amd_ntb_pci_driver_init(void) { + int ret; pr_info("%s %s\n", NTB_DESC, NTB_VER); if (debugfs_initialized()) debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); - return pci_register_driver(&amd_ntb_pci_driver); + ret = pci_register_driver(&amd_ntb_pci_driver); + if (ret) + debugfs_remove_recursive(debugfs_dir); + + return ret; } module_init(amd_ntb_pci_driver_init); From 03cfa0653406ee0bc615006129a63044c46263bc Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sat, 5 Nov 2022 09:43:22 +0000 Subject: [PATCH 354/509] ntb: intel: Fix error handling in intel_ntb_pci_driver_init() [ Upstream commit 4c3c796aca02883ad35bb117468938cc4022ca41 ] A problem about ntb_hw_intel create debugfs failed is triggered with the following log given: [ 273.112733] Intel(R) PCI-E Non-Transparent Bridge Driver 2.0 [ 273.115342] debugfs: Directory 'ntb_hw_intel' with parent '/' already present! The reason is that intel_ntb_pci_driver_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ntb_hw_intel can never be created later. intel_ntb_pci_driver_init() debugfs_create_dir() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Signed-off-by: Yuan Can Acked-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/intel/ntb_hw_gen1.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c index 093dd20057b9..4f1add57d81d 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c @@ -2068,12 +2068,17 @@ static struct pci_driver intel_ntb_pci_driver = { static int __init intel_ntb_pci_driver_init(void) { + int ret; pr_info("%s %s\n", NTB_DESC, NTB_VER); if (debugfs_initialized()) debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); - return pci_register_driver(&intel_ntb_pci_driver); + ret = pci_register_driver(&intel_ntb_pci_driver); + if (ret) + debugfs_remove_recursive(debugfs_dir); + + return ret; } module_init(intel_ntb_pci_driver_init); From 41c6d8ff71cd93748153cd186ff713e23c6f5e2a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 10 Nov 2022 23:19:17 +0800 Subject: [PATCH 355/509] NTB: ntb_transport: fix possible memory leak while device_register() fails [ Upstream commit 8623ccbfc55d962e19a3537652803676ad7acb90 ] If device_register() returns error, the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and client_dev is freed in ntb_transport_client_release(). Fixes: fce8a7bb5b4b ("PCI-Express Non-Transparent Bridge Support") Signed-off-by: Yang Yingliang Reviewed-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 4a02561cfb96..d18cb4476560 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -412,7 +412,7 @@ int ntb_transport_register_client_dev(char *device_name) rc = device_register(dev); if (rc) { - kfree(client_dev); + put_device(dev); goto err; } From 8271145523a5f6e71fd82ae706225a8bd8e6efc7 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 22 Nov 2022 11:32:44 +0800 Subject: [PATCH 356/509] NTB: ntb_tool: Add check for devm_kcalloc [ Upstream commit 2790143f09938776a3b4f69685b380bae8fd06c7 ] As the devm_kcalloc may return NULL pointer, it should be better to add check for the return value, as same as the others. Fixes: 7f46c8b3a552 ("NTB: ntb_tool: Add full multi-port NTB API support") Signed-off-by: Jiasheng Jiang Reviewed-by: Serge Semin Reviewed-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/test/ntb_tool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 5ee0afa621a9..eeeb4b1c97d2 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -998,6 +998,8 @@ static int tool_init_mws(struct tool_ctx *tc) tc->peers[pidx].outmws = devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmw_cnt, sizeof(*tc->peers[pidx].outmws), GFP_KERNEL); + if (tc->peers[pidx].outmws == NULL) + return -ENOMEM; for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) { tc->peers[pidx].outmws[widx].pidx = pidx; From 2ad31ce40e8182860b631e37209e93e543790b7c Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sat, 8 Jul 2023 14:59:10 +0800 Subject: [PATCH 357/509] ipv6/addrconf: fix a potential refcount underflow for idev [ Upstream commit 06a0716949c22e2aefb648526580671197151acc ] Now in addrconf_mod_rs_timer(), reference idev depends on whether rs_timer is not pending. Then modify rs_timer timeout. There is a time gap in [1], during which if the pending rs_timer becomes not pending. It will miss to hold idev, but the rs_timer is activated. Thus rs_timer callback function addrconf_rs_timer() will be executed and put idev later without holding idev. A refcount underflow issue for idev can be caused by this. if (!timer_pending(&idev->rs_timer)) in6_dev_hold(idev); <--------------[1] mod_timer(&idev->rs_timer, jiffies + when); To fix the issue, hold idev if mod_timer() return 0. Fixes: b7b1bfce0bb6 ("ipv6: split duplicate address detection and router solicitation timer") Suggested-by: Eric Dumazet Signed-off-by: Ziyang Xuan Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ed1e5bfc97b3..d5d10496b4ae 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -314,9 +314,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) static void addrconf_mod_rs_timer(struct inet6_dev *idev, unsigned long when) { - if (!timer_pending(&idev->rs_timer)) + if (!mod_timer(&idev->rs_timer, jiffies + when)) in6_dev_hold(idev); - mod_timer(&idev->rs_timer, jiffies + when); } static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, From 88dfb592d2c15bcb5cb37f40e3f2cbe61260b7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20P=C5=91cze?= Date: Sat, 4 Sep 2021 17:55:16 +0000 Subject: [PATCH 358/509] platform/x86: wmi: remove unnecessary argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 84eacf7e6413d5e2d2f4f9dddf9216c18a3631cf ] The GUID block is available for `wmi_create_device()` through `wblock->gblock`. Use that consistently in the function instead of using a mix of `gblock` and `wblock->gblock`. Signed-off-by: BarnabĆ”s Pőcze Link: https://lore.kernel.org/r/20210904175450.156801-8-pobrn@protonmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Stable-dep-of: 028e6e204ace ("platform/x86: wmi: Break possible infinite loop when parsing GUID") Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 1f80b2628162..9a6dc2717e1d 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -1042,7 +1042,6 @@ static const struct device_type wmi_type_data = { }; static int wmi_create_device(struct device *wmi_bus_dev, - const struct guid_block *gblock, struct wmi_block *wblock, struct acpi_device *device) { @@ -1050,12 +1049,12 @@ static int wmi_create_device(struct device *wmi_bus_dev, char method[5]; int result; - if (gblock->flags & ACPI_WMI_EVENT) { + if (wblock->gblock.flags & ACPI_WMI_EVENT) { wblock->dev.dev.type = &wmi_type_event; goto out_init; } - if (gblock->flags & ACPI_WMI_METHOD) { + if (wblock->gblock.flags & ACPI_WMI_METHOD) { wblock->dev.dev.type = &wmi_type_method; mutex_init(&wblock->char_mutex); goto out_init; @@ -1105,7 +1104,7 @@ static int wmi_create_device(struct device *wmi_bus_dev, wblock->dev.dev.bus = &wmi_bus_type; wblock->dev.dev.parent = wmi_bus_dev; - dev_set_name(&wblock->dev.dev, "%pUL", gblock->guid); + dev_set_name(&wblock->dev.dev, "%pUL", wblock->gblock.guid); device_initialize(&wblock->dev.dev); @@ -1197,7 +1196,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device) wblock->acpi_device = device; wblock->gblock = gblock[i]; - retval = wmi_create_device(wmi_bus_dev, &gblock[i], wblock, device); + retval = wmi_create_device(wmi_bus_dev, wblock, device); if (retval) { kfree(wblock); continue; From 4bb2bb69bd9a4f806dc12a0633e76c1904790117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20P=C5=91cze?= Date: Sat, 4 Sep 2021 17:55:39 +0000 Subject: [PATCH 359/509] platform/x86: wmi: use guid_t and guid_equal() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 67f472fdacf4a691b1c3c20c27800b23ce31e2de ] Instead of hard-coding a 16 long byte array, use the available `guid_t` type and related methods. Signed-off-by: BarnabĆ”s Pőcze Link: https://lore.kernel.org/r/20210904175450.156801-15-pobrn@protonmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Stable-dep-of: 028e6e204ace ("platform/x86: wmi: Break possible infinite loop when parsing GUID") Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 9a6dc2717e1d..18c4080d4a71 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -40,7 +40,7 @@ MODULE_LICENSE("GPL"); static LIST_HEAD(wmi_block_list); struct guid_block { - char guid[16]; + guid_t guid; union { char object_id[2]; struct { @@ -121,7 +121,7 @@ static bool find_guid(const char *guid_string, struct wmi_block **out) list_for_each_entry(wblock, &wmi_block_list, list) { block = &wblock->gblock; - if (memcmp(block->guid, &guid_input, 16) == 0) { + if (guid_equal(&block->guid, &guid_input)) { if (out) *out = wblock; return true; @@ -145,7 +145,7 @@ static const void *find_guid_context(struct wmi_block *wblock, while (*id->guid_string) { if (guid_parse(id->guid_string, &guid_input)) continue; - if (!memcmp(wblock->gblock.guid, &guid_input, 16)) + if (guid_equal(&wblock->gblock.guid, &guid_input)) return id->context; id++; } @@ -457,7 +457,7 @@ EXPORT_SYMBOL_GPL(wmi_set_block); static void wmi_dump_wdg(const struct guid_block *g) { - pr_info("%pUL:\n", g->guid); + pr_info("%pUL:\n", &g->guid); if (g->flags & ACPI_WMI_EVENT) pr_info("\tnotify_id: 0x%02X\n", g->notify_id); else @@ -539,7 +539,7 @@ wmi_notify_handler handler, void *data) list_for_each_entry(block, &wmi_block_list, list) { acpi_status wmi_status; - if (memcmp(block->gblock.guid, &guid_input, 16) == 0) { + if (guid_equal(&block->gblock.guid, &guid_input)) { if (block->handler && block->handler != wmi_notify_debug) return AE_ALREADY_ACQUIRED; @@ -579,7 +579,7 @@ acpi_status wmi_remove_notify_handler(const char *guid) list_for_each_entry(block, &wmi_block_list, list) { acpi_status wmi_status; - if (memcmp(block->gblock.guid, &guid_input, 16) == 0) { + if (guid_equal(&block->gblock.guid, &guid_input)) { if (!block->handler || block->handler == wmi_notify_debug) return AE_NULL_ENTRY; @@ -685,7 +685,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, { struct wmi_block *wblock = dev_to_wblock(dev); - return sprintf(buf, "wmi:%pUL\n", wblock->gblock.guid); + return sprintf(buf, "wmi:%pUL\n", &wblock->gblock.guid); } static DEVICE_ATTR_RO(modalias); @@ -694,7 +694,7 @@ static ssize_t guid_show(struct device *dev, struct device_attribute *attr, { struct wmi_block *wblock = dev_to_wblock(dev); - return sprintf(buf, "%pUL\n", wblock->gblock.guid); + return sprintf(buf, "%pUL\n", &wblock->gblock.guid); } static DEVICE_ATTR_RO(guid); @@ -777,10 +777,10 @@ static int wmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env) { struct wmi_block *wblock = dev_to_wblock(dev); - if (add_uevent_var(env, "MODALIAS=wmi:%pUL", wblock->gblock.guid)) + if (add_uevent_var(env, "MODALIAS=wmi:%pUL", &wblock->gblock.guid)) return -ENOMEM; - if (add_uevent_var(env, "WMI_GUID=%pUL", wblock->gblock.guid)) + if (add_uevent_var(env, "WMI_GUID=%pUL", &wblock->gblock.guid)) return -ENOMEM; return 0; @@ -808,7 +808,7 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver) if (WARN_ON(guid_parse(id->guid_string, &driver_guid))) continue; - if (!memcmp(&driver_guid, wblock->gblock.guid, 16)) + if (guid_equal(&driver_guid, &wblock->gblock.guid)) return 1; id++; @@ -1104,7 +1104,7 @@ static int wmi_create_device(struct device *wmi_bus_dev, wblock->dev.dev.bus = &wmi_bus_type; wblock->dev.dev.parent = wmi_bus_dev; - dev_set_name(&wblock->dev.dev, "%pUL", wblock->gblock.guid); + dev_set_name(&wblock->dev.dev, "%pUL", &wblock->gblock.guid); device_initialize(&wblock->dev.dev); @@ -1124,12 +1124,12 @@ static void wmi_free_devices(struct acpi_device *device) } } -static bool guid_already_parsed(struct acpi_device *device, const u8 *guid) +static bool guid_already_parsed(struct acpi_device *device, const guid_t *guid) { struct wmi_block *wblock; list_for_each_entry(wblock, &wmi_block_list, list) { - if (memcmp(wblock->gblock.guid, guid, 16) == 0) { + if (guid_equal(&wblock->gblock.guid, guid)) { /* * Because we historically didn't track the relationship * between GUIDs and ACPI nodes, we don't know whether @@ -1184,7 +1184,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device) * case yet, so for now, we'll just ignore the duplicate * for device creation. */ - if (guid_already_parsed(device, gblock[i].guid)) + if (guid_already_parsed(device, &gblock[i].guid)) continue; wblock = kzalloc(sizeof(struct wmi_block), GFP_KERNEL); @@ -1221,7 +1221,7 @@ static int parse_wdg(struct device *wmi_bus_dev, struct acpi_device *device) retval = device_add(&wblock->dev.dev); if (retval) { dev_err(wmi_bus_dev, "failed to register %pUL\n", - wblock->gblock.guid); + &wblock->gblock.guid); if (debug_event) wmi_method_enable(wblock, 0); list_del(&wblock->list); @@ -1335,7 +1335,7 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event, } if (debug_event) - pr_info("DEBUG Event GUID: %pUL\n", wblock->gblock.guid); + pr_info("DEBUG Event GUID: %pUL\n", &wblock->gblock.guid); acpi_bus_generate_netlink_event( wblock->acpi_device->pnp.device_class, From 7157ee0de522b588f08a61f3432bec5b88ccb1f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20P=C5=91cze?= Date: Sat, 4 Sep 2021 17:56:10 +0000 Subject: [PATCH 360/509] platform/x86: wmi: move variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f5431bf1e6781e876bdc8ae10fb1e7da6f1aa9b5 ] Move some variables in order to keep them in the narrowest possible scope. Signed-off-by: BarnabĆ”s Pőcze Link: https://lore.kernel.org/r/20210904175450.156801-22-pobrn@protonmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Stable-dep-of: 028e6e204ace ("platform/x86: wmi: Break possible infinite loop when parsing GUID") Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 18c4080d4a71..5e4c03f7db7c 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -134,7 +134,6 @@ static const void *find_guid_context(struct wmi_block *wblock, struct wmi_driver *wdriver) { const struct wmi_device_id *id; - guid_t guid_input; if (wblock == NULL || wdriver == NULL) return NULL; @@ -143,6 +142,8 @@ static const void *find_guid_context(struct wmi_block *wblock, id = wdriver->id_table; while (*id->guid_string) { + guid_t guid_input; + if (guid_parse(id->guid_string, &guid_input)) continue; if (guid_equal(&wblock->gblock.guid, &guid_input)) @@ -615,7 +616,6 @@ acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out) { struct acpi_object_list input; union acpi_object params[1]; - struct guid_block *gblock; struct wmi_block *wblock; input.count = 1; @@ -624,7 +624,7 @@ acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out) params[0].integer.value = event; list_for_each_entry(wblock, &wmi_block_list, list) { - gblock = &wblock->gblock; + struct guid_block *gblock = &wblock->gblock; if ((gblock->flags & ACPI_WMI_EVENT) && (gblock->notify_id == event)) @@ -1281,12 +1281,11 @@ acpi_wmi_ec_space_handler(u32 function, acpi_physical_address address, static void acpi_wmi_notify_handler(acpi_handle handle, u32 event, void *context) { - struct guid_block *block; struct wmi_block *wblock; bool found_it = false; list_for_each_entry(wblock, &wmi_block_list, list) { - block = &wblock->gblock; + struct guid_block *block = &wblock->gblock; if (wblock->acpi_device->handle == handle && (block->flags & ACPI_WMI_EVENT) && From cdf5b9af92dae93311098178cfa9dc42f96e7c0a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Jun 2023 18:11:54 +0300 Subject: [PATCH 361/509] platform/x86: wmi: Break possible infinite loop when parsing GUID [ Upstream commit 028e6e204ace1f080cfeacd72c50397eb8ae8883 ] The while-loop may break on one of the two conditions, either ID string is empty or GUID matches. The second one, may never be reached if the parsed string is not correct GUID. In such a case the loop will never advance to check the next ID. Break possible infinite loop by factoring out guid_parse_and_compare() helper which may be moved to the generic header for everyone later on and preventing from similar mistake in the future. Interestingly that firstly it appeared when WMI was turned into a bus driver, but later when duplicated GUIDs were checked, the while-loop has been replaced by for-loop and hence no mistake made again. Fixes: a48e23385fcf ("platform/x86: wmi: add context pointer field to struct wmi_device_id") Fixes: 844af950da94 ("platform/x86: wmi: Turn WMI into a bus driver") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230621151155.78279-1-andriy.shevchenko@linux.intel.com Tested-by: Armin Wolf Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 5e4c03f7db7c..567c28705cb1 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -130,6 +130,16 @@ static bool find_guid(const char *guid_string, struct wmi_block **out) return false; } +static bool guid_parse_and_compare(const char *string, const guid_t *guid) +{ + guid_t guid_input; + + if (guid_parse(string, &guid_input)) + return false; + + return guid_equal(&guid_input, guid); +} + static const void *find_guid_context(struct wmi_block *wblock, struct wmi_driver *wdriver) { @@ -142,11 +152,7 @@ static const void *find_guid_context(struct wmi_block *wblock, id = wdriver->id_table; while (*id->guid_string) { - guid_t guid_input; - - if (guid_parse(id->guid_string, &guid_input)) - continue; - if (guid_equal(&wblock->gblock.guid, &guid_input)) + if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid)) return id->context; id++; } @@ -804,11 +810,7 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver) return 0; while (*id->guid_string) { - guid_t driver_guid; - - if (WARN_ON(guid_parse(id->guid_string, &driver_guid))) - continue; - if (guid_equal(&driver_guid, &wblock->gblock.guid)) + if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid)) return 1; id++; From c48e8ee81ad35ceace0e971b37948887d4411da8 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:13 +0200 Subject: [PATCH 362/509] igc: Fix launchtime before start of cycle [ Upstream commit c1bca9ac0bcb355be11354c2e68bc7bf31f5ac5a ] It is possible (verified on a running system) that frames are processed by igc_tx_launchtime with a txtime before the start of the cycle (baset_est). However, the result of txtime - baset_est is written into a u32, leading to a wrap around to a positive number. The following launchtime > 0 check will only branch to executing launchtime = 0 if launchtime is already 0. Fix it by using a s32 before checking launchtime > 0. Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 2b51ee87a2de..051b1048eb41 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -905,7 +905,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, ktime_t base_time = adapter->base_time; ktime_t now = ktime_get_clocktai(); ktime_t baset_est, end_of_cycle; - u32 launchtime; + s32 launchtime; s64 n; n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); From 83579a626169b35553945ff48aa5936b5db06ed2 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:14 +0200 Subject: [PATCH 363/509] igc: Fix inserting of empty frame for launchtime [ Upstream commit 0bcc62858d6ba62cbade957d69745e6adeed5f3d ] The insertion of an empty frame was introduced with commit db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") in order to ensure that the current cycle has at least one packet if there is some packet to be scheduled for the next cycle. However, the current implementation does not properly check if a packet is already scheduled for the current cycle. Currently, an empty packet is always inserted if and only if txtime >= end_of_cycle && txtime > last_tx_cycle but since last_tx_cycle is always either the end of the current cycle (end_of_cycle) or the end of a previous cycle, the second part (txtime > last_tx_cycle) is always true unless txtime == last_tx_cycle. What actually needs to be checked here is if the last_tx_cycle was already written within the current cycle, so an empty frame should only be inserted if and only if txtime >= end_of_cycle && end_of_cycle > last_tx_cycle. This patch does not only avoid an unnecessary insertion, but it can actually be harmful to insert an empty packet if packets are already scheduled in the current cycle, because it can lead to a situation where the empty packet is actually processed as the first packet in the upcoming cycle shifting the packet with the first_flag even one cycle into the future, finally leading to a TX hang. The TX hang can be reproduced on a i225 with: sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x1 \ txtime-delay 500000 \ clockid CLOCK_TAI sudo tc qdisc replace dev enp1s0 parent 100:1 etf \ clockid CLOCK_TAI \ delta 500000 \ offload \ skip_sock_check and traffic generator sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns with traffic.cfg #define ETH_P_IP 0x0800 { /* Ethernet Header */ 0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e, # MAC Dest - adapt as needed 0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36, # MAC Src - adapt as needed const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 48, 1, # IP Src - adapt as needed 10, 0, 48, 10, # IP Dest - adapt as needed const16(5555), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } and the observed message with that is for example igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang Tx Queue <0> TDH <32> TDT <3c> next_to_use <3c> next_to_clean <32> buffer_info[next_to_clean] time_stamp next_to_watch <00000000632a1828> jiffies desc.status <1048000> Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 051b1048eb41..631ce793fb2e 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -918,7 +918,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, *first_flag = true; ring->last_ff_cycle = baset_est; - if (ktime_compare(txtime, ring->last_tx_cycle) > 0) + if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) *insert_empty = true; } } From 7c616437981f40a1c93fb28e6cb17ea25d87be2b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:24:54 +0800 Subject: [PATCH 364/509] riscv: bpf: Move bpf_jit_alloc_exec() and bpf_jit_free_exec() to core [ Upstream commit 1d27d854425faec98f352cf88ec3e2a8844429a4 ] We will drop the executable permissions of the code pages from the mapping at allocation time soon. Move bpf_jit_alloc_exec() and bpf_jit_free_exec() to bpf_jit_core.c so that they can be shared by both RV64I and RV32I. Signed-off-by: Jisheng Zhang Acked-by: Luke Nelson Signed-off-by: Palmer Dabbelt Stable-dep-of: c56fb2aab235 ("riscv, bpf: Fix inconsistent JIT image generation") Signed-off-by: Sasha Levin --- arch/riscv/net/bpf_jit_comp64.c | 13 ------------- arch/riscv/net/bpf_jit_core.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index c113ae818b14..053dc83e323b 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1144,16 +1144,3 @@ void bpf_jit_build_epilogue(struct rv_jit_context *ctx) { __build_epilogue(false, ctx); } - -void *bpf_jit_alloc_exec(unsigned long size) -{ - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} - -void bpf_jit_free_exec(void *addr) -{ - return vfree(addr); -} diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index cbf7d2414886..e295c9eed9e9 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -165,3 +165,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) tmp : orig_prog); return prog; } + +void *bpf_jit_alloc_exec(unsigned long size) +{ + return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, + BPF_JIT_REGION_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} + +void bpf_jit_free_exec(void *addr) +{ + return vfree(addr); +} From eb3d1d84f3d68c293579495728d2fef8be35c11a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:25:21 +0800 Subject: [PATCH 365/509] riscv: bpf: Avoid breaking W^X [ Upstream commit fc8504765ec5e812135b8ccafca7101069a0c6d8 ] We allocate Non-executable pages, then call bpf_jit_binary_lock_ro() to enable executable permission after mapping them read-only. This is to prepare for STRICT_MODULE_RWX in following patch. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt Stable-dep-of: c56fb2aab235 ("riscv, bpf: Fix inconsistent JIT image generation") Signed-off-by: Sasha Levin --- arch/riscv/net/bpf_jit_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index e295c9eed9e9..5d247198c30d 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -153,6 +153,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); if (!prog->is_func || extra_pass) { + bpf_jit_binary_lock_ro(jit_data->header); out_offset: kfree(ctx->offset); kfree(jit_data); @@ -170,7 +171,7 @@ void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } From a976adc3bca46e9f60345e9387d59463c58b8c7d Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Mon, 30 May 2022 17:28:11 +0800 Subject: [PATCH 366/509] bpf, riscv: Support riscv jit to provide bpf_line_info [ Upstream commit 3cb70413041fdf028fa1ba3986fd0c6aec9e3dcb ] Add support for riscv jit to provide bpf_line_info. We need to consider the prologue offset in ctx->offset, but unlike x86 and arm64, ctx->offset of riscv does not provide an extra slot for the prologue, so here we just calculate the len of prologue and add it to ctx->offset at the end. Both RV64 and RV32 have been tested. Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220530092815.1112406-3-pulehui@huawei.com Stable-dep-of: c56fb2aab235 ("riscv, bpf: Fix inconsistent JIT image generation") Signed-off-by: Sasha Levin --- arch/riscv/net/bpf_jit.h | 1 + arch/riscv/net/bpf_jit_core.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index 75c1e9996867..ab0cd6d10ccf 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -69,6 +69,7 @@ struct rv_jit_context { struct bpf_prog *prog; u16 *insns; /* RV insns */ int ninsns; + int body_len; int epilogue_offset; int *offset; /* BPF to RV */ unsigned long flags; diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 5d247198c30d..750b15c319d5 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -43,7 +43,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; - int pass = 0, prev_ninsns = 0, i; + int pass = 0, prev_ninsns = 0, prologue_len, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; unsigned int image_size = 0; @@ -95,6 +95,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; goto out_offset; } + ctx->body_len = ctx->ninsns; bpf_jit_build_prologue(ctx); ctx->epilogue_offset = ctx->ninsns; bpf_jit_build_epilogue(ctx); @@ -154,6 +155,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (!prog->is_func || extra_pass) { bpf_jit_binary_lock_ro(jit_data->header); + prologue_len = ctx->epilogue_offset - ctx->body_len; + for (i = 0; i < prog->len; i++) + ctx->offset[i] = ninsns_rvoff(prologue_len + + ctx->offset[i]); + bpf_prog_fill_jited_linfo(prog, ctx->offset); out_offset: kfree(ctx->offset); kfree(jit_data); From bbc500ff3f2c2c3c3fd1ff2953e1965874cc9418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 10 Jul 2023 09:41:31 +0200 Subject: [PATCH 367/509] riscv, bpf: Fix inconsistent JIT image generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c56fb2aab23505bb7160d06097c8de100b82b851 ] In order to generate the prologue and epilogue, the BPF JIT needs to know which registers that are clobbered. Therefore, the during pre-final passes, the prologue is generated after the body of the program body-prologue-epilogue. Then, in the final pass, a proper prologue-body-epilogue JITted image is generated. This scheme has worked most of the time. However, for some large programs with many jumps, e.g. the test_kmod.sh BPF selftest with hardening enabled (blinding constants), this has shown to be incorrect. For the final pass, when the proper prologue-body-epilogue is generated, the image has not converged. This will lead to that the final image will have incorrect jump offsets. The following is an excerpt from an incorrect image: | ... | 3b8: 00c50663 beq a0,a2,3c4 <.text+0x3c4> | 3bc: 0020e317 auipc t1,0x20e | 3c0: 49630067 jalr zero,1174(t1) # 20e852 <.text+0x20e852> | ... | 20e84c: 8796 c.mv a5,t0 | 20e84e: 6422 c.ldsp s0,8(sp) # Epilogue start | 20e850: 6141 c.addi16sp sp,16 | 20e852: 853e c.mv a0,a5 # Incorrect jump target | 20e854: 8082 c.jr ra The image has shrunk, and the epilogue offset is incorrect in the final pass. Correct the problem by always generating proper prologue-body-epilogue outputs, which means that the first pass will only generate the body to track what registers that are touched. Fixes: 2353ecc6f91f ("bpf, riscv: add BPF JIT for RV64G") Signed-off-by: Bjƶrn Tƶpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230710074131.19596-1-bjorn@kernel.org Signed-off-by: Sasha Levin --- arch/riscv/net/bpf_jit.h | 6 +++--- arch/riscv/net/bpf_jit_core.c | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index ab0cd6d10ccf..ef336fe16004 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -69,7 +69,7 @@ struct rv_jit_context { struct bpf_prog *prog; u16 *insns; /* RV insns */ int ninsns; - int body_len; + int prologue_len; int epilogue_offset; int *offset; /* BPF to RV */ unsigned long flags; @@ -215,8 +215,8 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx) int from, to; off++; /* BPF branch is from PC+1, RV is from PC */ - from = (insn > 0) ? ctx->offset[insn - 1] : 0; - to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; + from = (insn > 0) ? ctx->offset[insn - 1] : ctx->prologue_len; + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : ctx->prologue_len; return ninsns_rvoff(to - from); } diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 750b15c319d5..ef17bc8055d4 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -43,7 +43,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; - int pass = 0, prev_ninsns = 0, prologue_len, i; + int pass = 0, prev_ninsns = 0, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; unsigned int image_size = 0; @@ -83,6 +83,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; goto out_offset; } + + if (build_body(ctx, extra_pass, NULL)) { + prog = orig_prog; + goto out_offset; + } + for (i = 0; i < prog->len; i++) { prev_ninsns += 32; ctx->offset[i] = prev_ninsns; @@ -91,12 +97,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) for (i = 0; i < NR_JIT_ITERATIONS; i++) { pass++; ctx->ninsns = 0; + + bpf_jit_build_prologue(ctx); + ctx->prologue_len = ctx->ninsns; + if (build_body(ctx, extra_pass, ctx->offset)) { prog = orig_prog; goto out_offset; } - ctx->body_len = ctx->ninsns; - bpf_jit_build_prologue(ctx); + ctx->epilogue_offset = ctx->ninsns; bpf_jit_build_epilogue(ctx); @@ -155,10 +164,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (!prog->is_func || extra_pass) { bpf_jit_binary_lock_ro(jit_data->header); - prologue_len = ctx->epilogue_offset - ctx->body_len; for (i = 0; i < prog->len; i++) - ctx->offset[i] = ninsns_rvoff(prologue_len + - ctx->offset[i]); + ctx->offset[i] = ninsns_rvoff(ctx->offset[i]); bpf_prog_fill_jited_linfo(prog, ctx->offset); out_offset: kfree(ctx->offset); From 4511499138aecce6b6f93191e8a6605650ebc6ba Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Mon, 10 Jul 2023 17:34:10 +0800 Subject: [PATCH 368/509] erofs: avoid infinite loop in z_erofs_do_read_page() when reading beyond EOF [ Upstream commit 8191213a5835b0317c5e4d0d337ae1ae00c75253 ] z_erofs_do_read_page() may loop infinitely due to the inappropriate truncation in the below statement. Since the offset is 64 bits and min_t() truncates the result to 32 bits. The solution is to replace unsigned int with a 64-bit type, such as erofs_off_t. cur = end - min_t(unsigned int, offset + end - map->m_la, end); - For example: - offset = 0x400160000 - end = 0x370 - map->m_la = 0x160370 - offset + end - map->m_la = 0x400000000 - offset + end - map->m_la = 0x00000000 (truncated as unsigned int) - Expected result: - cur = 0 - Actual result: - cur = 0x370 Signed-off-by: Chunhai Guo Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20230710093410.44071-1-guochunhai@vivo.com Signed-off-by: Gao Xiang Signed-off-by: Sasha Levin --- fs/erofs/zdata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 8cb2cf612e49..9cff92738259 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -629,7 +629,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED && clt->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE); - cur = end - min_t(unsigned int, offset + end - map->m_la, end); + cur = end - min_t(erofs_off_t, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, cur, end); goto next_part; From 934c85b8ecd15c2c1eb9b4769fb790e35737545a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 9 Jul 2023 06:31:54 -0700 Subject: [PATCH 369/509] wifi: airo: avoid uninitialized warning in airo_get_rate() [ Upstream commit 9373771aaed17f5c2c38485f785568abe3a9f8c1 ] Quieten a gcc (11.3.0) build error or warning by checking the function call status and returning -EBUSY if the function call failed. This is similar to what several other wireless drivers do for the SIOCGIWRATE ioctl call when there is a locking problem. drivers/net/wireless/cisco/airo.c: error: 'status_rid.currentXmitRate' is used uninitialized [-Werror=uninitialized] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Randy Dunlap Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/39abf2c7-24a-f167-91da-ed4c5435d1c4@linux-m68k.org Link: https://lore.kernel.org/r/20230709133154.26206-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wireless/cisco/airo.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 8c9c6bfbaeee..aa1d12f6f5c3 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -6150,8 +6150,11 @@ static int airo_get_rate(struct net_device *dev, { struct airo_info *local = dev->ml_priv; StatusRid status_rid; /* Card status info */ + int ret; - readStatusRid(local, &status_rid, 1); + ret = readStatusRid(local, &status_rid, 1); + if (ret) + return -EBUSY; vwrq->value = le16_to_cpu(status_rid.currentXmitRate) * 500000; /* If more than one rate, set auto */ From 9b69cdb6e534b5d3c461ceb4c895bc805883ecb7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 11 Jul 2023 10:08:09 +0300 Subject: [PATCH 370/509] net/sched: flower: Ensure both minimum and maximum ports are specified [ Upstream commit d3f87278bcb80bd7f9519669d928b43320363d4f ] The kernel does not currently validate that both the minimum and maximum ports of a port range are specified. This can lead user space to think that a filter matching on a port range was successfully added, when in fact it was not. For example, with a patched (buggy) iproute2 that only sends the minimum port, the following commands do not return an error: # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp src_port 100-200 action pass # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp dst_port 100-200 action pass # tc filter show dev swp1 ingress filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 ip_proto udp not_in_hw action order 1: gact action pass random type none pass val 0 index 1 ref 1 bind 1 filter protocol ip pref 1 flower chain 0 handle 0x2 eth_type ipv4 ip_proto udp not_in_hw action order 1: gact action pass random type none pass val 0 index 2 ref 1 bind 1 Fix by returning an error unless both ports are specified: # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp src_port 100-200 action pass Error: Both min and max source ports must be specified. We have an error talking to the kernel # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp dst_port 100-200 action pass Error: Both min and max destination ports must be specified. We have an error talking to the kernel Fixes: 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/cls_flower.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index caf1a05bfbde..dcf21d99f132 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -778,6 +778,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src, TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src)); + if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) { + NL_SET_ERR_MSG(extack, + "Both min and max destination ports must be specified"); + return -EINVAL; + } + if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) { + NL_SET_ERR_MSG(extack, + "Both min and max source ports must be specified"); + return -EINVAL; + } if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst && ntohs(key->tp_range.tp_max.dst) <= ntohs(key->tp_range.tp_min.dst)) { From d5ca61b7642b74d068a0beef585b6d812541d482 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jul 2023 11:52:26 +0300 Subject: [PATCH 371/509] netdevsim: fix uninitialized data in nsim_dev_trap_fa_cookie_write() [ Upstream commit f72207a5c0dbaaf6921cf9a6c0d2fd0bc249ea78 ] The simple_write_to_buffer() function is designed to handle partial writes. It returns negatives on error, otherwise it returns the number of bytes that were able to be copied. This code doesn't check the return properly. We only know that the first byte is written, the rest of the buffer might be uninitialized. There is no need to use the simple_write_to_buffer() function. Partial writes are prohibited by the "if (*ppos != 0)" check at the start of the function. Just use memdup_user() and copy the whole buffer. Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata") Signed-off-by: Dan Carpenter Reviewed-by: Pavan Chebbi Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/7c1f950b-3a7d-4252-82a6-876e53078ef7@moroto.mountain Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/netdevsim/dev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 9bbecf4d159b..bcf354719745 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -149,13 +149,10 @@ static ssize_t nsim_dev_trap_fa_cookie_write(struct file *file, cookie_len = (count - 1) / 2; if ((count - 1) % 2) return -EINVAL; - buf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); - if (!buf) - return -ENOMEM; - ret = simple_write_to_buffer(buf, count, ppos, data, count); - if (ret < 0) - goto free_buf; + buf = memdup_user(data, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); fa_cookie = kmalloc(sizeof(*fa_cookie) + cookie_len, GFP_KERNEL | __GFP_NOWARN); From 1d7ae38daac74122aee4fead96fd745718bcacb6 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Mon, 10 Jul 2023 23:16:34 -0300 Subject: [PATCH 372/509] net/sched: make psched_mtu() RTNL-less safe [ Upstream commit 150e33e62c1fa4af5aaab02776b6c3812711d478 ] Eric Dumazet says[1]: ------- Speaking of psched_mtu(), I see that net/sched/sch_pie.c is using it without holding RTNL, so dev->mtu can be changed underneath. KCSAN could issue a warning. ------- Annotate dev->mtu with READ_ONCE() so KCSAN don't issue a warning. [1] https://lore.kernel.org/all/CANn89iJoJO5VtaJ-2=_d2aOQhb0Xw8iBT_Cxqp2HyuS-zj6azw@mail.gmail.com/ v1 -> v2: Fix commit message Fixes: d4b36210c2e6 ("net: pkt_sched: PIE AQM scheme") Suggested-by: Eric Dumazet Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230711021634.561598-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/pkt_sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index ba781e0aaf56..e186b2bd8c86 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -136,7 +136,7 @@ extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; */ static inline unsigned int psched_mtu(const struct net_device *dev) { - return dev->mtu + dev->hard_header_len; + return READ_ONCE(dev->mtu) + dev->hard_header_len; } static inline struct net *qdisc_net(struct Qdisc *q) From 5bef780e06d2948b9665e22e0fcdb2fba8dd4653 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Sat, 22 Apr 2023 12:56:11 -0300 Subject: [PATCH 373/509] net/sched: sch_qfq: refactor parsing of netlink parameters [ Upstream commit 25369891fcef373540f8b4e0b3bccf77a04490d5 ] Two parameters can be transformed into netlink policies and validated while parsing the netlink message. Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Signed-off-by: David S. Miller Stable-dep-of: 3e337087c3b5 ("net/sched: sch_qfq: account for stab overhead in qfq_enqueue") Signed-off-by: Sasha Levin --- net/sched/sch_qfq.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index cad7deacf60a..975e444f2d82 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -113,6 +113,7 @@ #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ +#define QFQ_MAX_LMAX (1UL << QFQ_MTU_SHIFT) #define QFQ_MAX_AGG_CLASSES 8 /* max num classes per aggregate allowed */ @@ -214,9 +215,14 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } +static struct netlink_range_validation lmax_range = { + .min = QFQ_MIN_LMAX, + .max = QFQ_MAX_LMAX, +}; + static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { - [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, - [TCA_QFQ_LMAX] = { .type = NLA_U32 }, + [TCA_QFQ_WEIGHT] = NLA_POLICY_RANGE(NLA_U32, 1, QFQ_MAX_WEIGHT), + [TCA_QFQ_LMAX] = NLA_POLICY_FULL_RANGE(NLA_U32, &lmax_range), }; /* @@ -408,17 +414,13 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], - qfq_policy, NULL); + qfq_policy, extack); if (err < 0) return err; - if (tb[TCA_QFQ_WEIGHT]) { + if (tb[TCA_QFQ_WEIGHT]) weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); - if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) { - pr_notice("qfq: invalid weight %u\n", weight); - return -EINVAL; - } - } else + else weight = 1; if (tb[TCA_QFQ_LMAX]) @@ -426,11 +428,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, else lmax = psched_mtu(qdisc_dev(sch)); - if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { - pr_notice("qfq: invalid max length %u\n", lmax); - return -EINVAL; - } - inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; From 8359ee85fd6dabc5c134ed69fb22faadd8a44071 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 11 Jul 2023 18:01:02 -0300 Subject: [PATCH 374/509] net/sched: sch_qfq: account for stab overhead in qfq_enqueue [ Upstream commit 3e337087c3b5805fe0b8a46ba622a962880b5d64 ] Lion says: ------- In the QFQ scheduler a similar issue to CVE-2023-31436 persists. Consider the following code in net/sched/sch_qfq.c: static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb), gso_segs; // ... if (unlikely(cl->agg->lmax < len)) { pr_debug("qfq: increasing maxpkt from %u to %u for class %u", cl->agg->lmax, len, cl->common.classid); err = qfq_change_agg(sch, cl, cl->agg->class_weight, len); if (err) { cl->qstats.drops++; return qdisc_drop(skb, sch, to_free); } // ... } Similarly to CVE-2023-31436, "lmax" is increased without any bounds checks according to the packet length "len". Usually this would not impose a problem because packet sizes are naturally limited. This is however not the actual packet length, rather the "qdisc_pkt_len(skb)" which might apply size transformations according to "struct qdisc_size_table" as created by "qdisc_get_stab()" in net/sched/sch_api.c if the TCA_STAB option was set when modifying the qdisc. A user may choose virtually any size using such a table. As a result the same issue as in CVE-2023-31436 can occur, allowing heap out-of-bounds read / writes in the kmalloc-8192 cache. ------- We can create the issue with the following commands: tc qdisc add dev $DEV root handle 1: stab mtu 2048 tsize 512 mpu 0 \ overhead 999999999 linklayer ethernet qfq tc class add dev $DEV parent 1: classid 1:1 htb rate 6mbit burst 15k tc filter add dev $DEV parent 1: matchall classid 1:1 ping -I $DEV 1.1.1.2 This is caused by incorrectly assuming that qdisc_pkt_len() returns a length within the QFQ_MIN_LMAX < len < QFQ_MAX_LMAX. Fixes: 462dbc9101ac ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost") Reported-by: Lion Reviewed-by: Eric Dumazet Signed-off-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sched/sch_qfq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 975e444f2d82..616d1798cfef 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -381,8 +381,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, u32 lmax) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight); + struct qfq_aggregate *new_agg; + /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */ + if (lmax > QFQ_MAX_LMAX) + return -EINVAL; + + new_agg = qfq_find_agg(q, lmax, weight); if (new_agg == NULL) { /* create new aggregate */ new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC); if (new_agg == NULL) From f4ff3798123585c9c7dd48ab7ef169fb505e0774 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 13 Jul 2023 17:26:20 +0800 Subject: [PATCH 375/509] nvme-pci: fix DMA direction of unmapping integrity data [ Upstream commit b8f6446b6853768cb99e7c201bddce69ca60c15e ] DMA direction should be taken in dma_unmap_page() for unmapping integrity data. Fix this DMA direction, and reported in Guangwu's test. Reported-by: Guangwu Zhang Fixes: 4aedb705437f ("nvme-pci: split metadata handling from nvme_map_data / nvme_unmap_data") Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c47512da9872..3aaead9b3a57 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -968,7 +968,8 @@ static void nvme_pci_complete_rq(struct request *req) if (blk_integrity_rq(req)) dma_unmap_page(dev->dev, iod->meta_dma, - rq_integrity_vec(req)->bv_len, rq_data_dir(req)); + rq_integrity_vec(req)->bv_len, rq_dma_dir(req)); + if (blk_rq_nr_phys_segments(req)) nvme_unmap_data(dev, req); nvme_complete_rq(req); From b39ef5b52f10b819bd0ceeb22e8f7df7800880ca Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 23 May 2023 14:17:25 +0800 Subject: [PATCH 376/509] f2fs: fix to avoid NULL pointer dereference f2fs_write_end_io() commit d8189834d4348ae608083e1f1f53792cfcc2a9bc upstream. butt3rflyh4ck reports a bug as below: When a thread always calls F2FS_IOC_RESIZE_FS to resize fs, if resize fs is failed, f2fs kernel thread would invoke callback function to update f2fs io info, it would call f2fs_write_end_io and may trigger null-ptr-deref in NODE_MAPPING. general protection fault, probably for non-canonical address KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] RIP: 0010:NODE_MAPPING fs/f2fs/f2fs.h:1972 [inline] RIP: 0010:f2fs_write_end_io+0x727/0x1050 fs/f2fs/data.c:370 bio_endio+0x5af/0x6c0 block/bio.c:1608 req_bio_endio block/blk-mq.c:761 [inline] blk_update_request+0x5cc/0x1690 block/blk-mq.c:906 blk_mq_end_request+0x59/0x4c0 block/blk-mq.c:1023 lo_complete_rq+0x1c6/0x280 drivers/block/loop.c:370 blk_complete_reqs+0xad/0xe0 block/blk-mq.c:1101 __do_softirq+0x1d4/0x8ef kernel/softirq.c:571 run_ksoftirqd kernel/softirq.c:939 [inline] run_ksoftirqd+0x31/0x60 kernel/softirq.c:931 smpboot_thread_fn+0x659/0x9e0 kernel/smpboot.c:164 kthread+0x33e/0x440 kernel/kthread.c:379 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 The root cause is below race case can cause leaving dirty metadata in f2fs after filesystem is remount as ro: Thread A Thread B - f2fs_ioc_resize_fs - f2fs_readonly --- return false - f2fs_resize_fs - f2fs_remount - write_checkpoint - set f2fs as ro - free_segment_range - update meta_inode's data Then, if f2fs_put_super() fails to write_checkpoint due to readonly status, and meta_inode's dirty data will be writebacked after node_inode is put, finally, f2fs_write_end_io will access NULL pointer on sbi->node_inode. Thread A IRQ context - f2fs_put_super - write_checkpoint fails - iput(node_inode) - node_inode = NULL - iput(meta_inode) - write_inode_now - f2fs_write_meta_page - f2fs_write_end_io - NODE_MAPPING(sbi) : access NULL pointer on node_inode Fixes: b4b10061ef98 ("f2fs: refactor resize_fs to avoid meta updates in progress") Reported-by: butt3rflyh4ck Closes: https://lore.kernel.org/r/1684480657-2375-1-git-send-email-yangtiezhu@loongson.cn Tested-by: butt3rflyh4ck Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Stefan Ghinea Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 22 +++++++++++++++++++--- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 62b7848f1f71..83ebc860508b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3483,7 +3483,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force, unsigned int segno); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); -int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); +int f2fs_resize_fs(struct file *filp, __u64 block_count); int __init f2fs_create_garbage_collection_cache(void); void f2fs_destroy_garbage_collection_cache(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a0d8aa52b696..6e91be5b8c30 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3356,7 +3356,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) sizeof(block_count))) return -EFAULT; - return f2fs_resize_fs(sbi, block_count); + return f2fs_resize_fs(filp, block_count); } static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 66ac048cc899..8e4daee0171f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include #include @@ -1976,8 +1977,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) } } -int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) +int f2fs_resize_fs(struct file *filp, __u64 block_count) { + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); __u64 old_block_count, shrunk_blocks; struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; unsigned int secs; @@ -2015,12 +2017,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) return -EINVAL; } + err = mnt_want_write_file(filp); + if (err) + return err; + shrunk_blocks = old_block_count - block_count; secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); /* stop other GC */ - if (!down_write_trylock(&sbi->gc_lock)) - return -EAGAIN; + if (!down_write_trylock(&sbi->gc_lock)) { + err = -EAGAIN; + goto out_drop_write; + } /* stop CP to protect MAIN_SEC in free_segment_range */ f2fs_lock_op(sbi); @@ -2040,10 +2048,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) out_unlock: f2fs_unlock_op(sbi); up_write(&sbi->gc_lock); +out_drop_write: + mnt_drop_write_file(filp); if (err) return err; freeze_super(sbi->sb); + + if (f2fs_readonly(sbi->sb)) { + thaw_super(sbi->sb); + return -EROFS; + } + down_write(&sbi->gc_lock); mutex_lock(&sbi->cp_mutex); From 3674b9c056adb653f260c5665591b16b3ef71683 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 21 Apr 2023 07:06:22 -0500 Subject: [PATCH 377/509] pinctrl: amd: Fix mistake in handling clearing pins at startup commit a855724dc08b8cb0c13ab1e065a4922f1e5a7552 upstream. commit 4e5a04be88fe ("pinctrl: amd: disable and mask interrupts on probe") had a mistake in loop iteration 63 that it would clear offset 0xFC instead of 0x100. Offset 0xFC is actually `WAKE_INT_MASTER_REG`. This was clearing bits 13 and 15 from the register which significantly changed the expected handling for some platforms for GPIO0. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=217315 Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230421120625.3366-3-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 82b658a3c220..6ddbffdb2b6c 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -784,9 +784,9 @@ static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) raw_spin_lock_irqsave(&gpio_dev->lock, flags); - pin_reg = readl(gpio_dev->base + i * 4); + pin_reg = readl(gpio_dev->base + pin * 4); pin_reg &= ~mask; - writel(pin_reg, gpio_dev->base + i * 4); + writel(pin_reg, gpio_dev->base + pin * 4); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); } From 4f77a87ce9198b2fc738e7d6c7cd8c43372d0e32 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 21 Apr 2023 07:06:21 -0500 Subject: [PATCH 378/509] pinctrl: amd: Detect internal GPIO0 debounce handling commit 968ab9261627fa305307e3935ca1a32fcddd36cb upstream. commit 4e5a04be88fe ("pinctrl: amd: disable and mask interrupts on probe") had a mistake in loop iteration 63 that it would clear offset 0xFC instead of 0x100. Offset 0xFC is actually `WAKE_INT_MASTER_REG`. This was clearing bits 13 and 15 from the register which significantly changed the expected handling for some platforms for GPIO0. commit b26cd9325be4 ("pinctrl: amd: Disable and mask interrupts on resume") actually fixed this bug, but lead to regressions on Lenovo Z13 and some other systems. This is because there was no handling in the driver for bit 15 debounce behavior. Quoting a public BKDG: ``` EnWinBlueBtn. Read-write. Reset: 0. 0=GPIO0 detect debounced power button; Power button override is 4 seconds. 1=GPIO0 detect debounced power button in S3/S5/S0i3, and detect "pressed less than 2 seconds" and "pressed 2~10 seconds" in S0; Power button override is 10 seconds ``` Cross referencing the same master register in Windows it's obvious that Windows doesn't use debounce values in this configuration. So align the Linux driver to do this as well. This fixes wake on lid when WAKE_INT_MASTER_REG is properly programmed. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=217315 Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230421120625.3366-2-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 7 +++++++ drivers/pinctrl/pinctrl-amd.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 6ddbffdb2b6c..c983d3306607 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -126,6 +126,12 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, struct amd_gpio *gpio_dev = gpiochip_get_data(gc); raw_spin_lock_irqsave(&gpio_dev->lock, flags); + + /* Use special handling for Pin0 debounce */ + pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); + if (pin_reg & INTERNAL_GPIO0_DEBOUNCE) + debounce = 0; + pin_reg = readl(gpio_dev->base + offset * 4); if (debounce) { @@ -215,6 +221,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) char *output_value; char *output_enable; + seq_printf(s, "WAKE_INT_MASTER_REG: 0x%08x\n", readl(gpio_dev->base + WAKE_INT_MASTER_REG)); for (bank = 0; bank < gpio_dev->hwbank_num; bank++) { seq_printf(s, "GPIO bank%d\t", bank); diff --git a/drivers/pinctrl/pinctrl-amd.h b/drivers/pinctrl/pinctrl-amd.h index 95e763424042..9f95ec9e2201 100644 --- a/drivers/pinctrl/pinctrl-amd.h +++ b/drivers/pinctrl/pinctrl-amd.h @@ -17,6 +17,7 @@ #define AMD_GPIO_PINS_BANK3 32 #define WAKE_INT_MASTER_REG 0xfc +#define INTERNAL_GPIO0_DEBOUNCE (1 << 15) #define EOI_MASK (1 << 29) #define WAKE_INT_STATUS_REG0 0x2f8 From 59490249c2c09f7d5b9440a1428ae9c66a1e142f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 5 Jul 2023 08:30:02 -0500 Subject: [PATCH 379/509] pinctrl: amd: Only use special debounce behavior for GPIO 0 commit 0d5ace1a07f7e846d0f6d972af60d05515599d0b upstream. It's uncommon to use debounce on any other pin, but technically we should only set debounce to 0 when working off GPIO0. Cc: stable@vger.kernel.org Tested-by: Jan Visser Fixes: 968ab9261627 ("pinctrl: amd: Detect internal GPIO0 debounce handling") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230705133005.577-2-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index c983d3306607..0d71151575ee 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -128,9 +128,11 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, raw_spin_lock_irqsave(&gpio_dev->lock, flags); /* Use special handling for Pin0 debounce */ - pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); - if (pin_reg & INTERNAL_GPIO0_DEBOUNCE) - debounce = 0; + if (offset == 0) { + pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); + if (pin_reg & INTERNAL_GPIO0_DEBOUNCE) + debounce = 0; + } pin_reg = readl(gpio_dev->base + offset * 4); From 9ff7fcb3a2ed0e9b895bb5b4c13872d584a8815b Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 16 May 2023 01:25:54 +0300 Subject: [PATCH 380/509] tpm: tpm_vtpm_proxy: fix a race condition in /dev/vtpmx creation commit f4032d615f90970d6c3ac1d9c0bce3351eb4445c upstream. /dev/vtpmx is made visible before 'workqueue' is initialized, which can lead to a memory corruption in the worst case scenario. Address this by initializing 'workqueue' as the very first step of the driver initialization. Cc: stable@vger.kernel.org Fixes: 6f99612e2500 ("tpm: Proxy driver for supporting multiple emulated TPMs") Reviewed-by: Stefan Berger Signed-off-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_vtpm_proxy.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c index 91c772e38bb5..ff2ec71d592e 100644 --- a/drivers/char/tpm/tpm_vtpm_proxy.c +++ b/drivers/char/tpm/tpm_vtpm_proxy.c @@ -683,37 +683,21 @@ static struct miscdevice vtpmx_miscdev = { .fops = &vtpmx_fops, }; -static int vtpmx_init(void) -{ - return misc_register(&vtpmx_miscdev); -} - -static void vtpmx_cleanup(void) -{ - misc_deregister(&vtpmx_miscdev); -} - static int __init vtpm_module_init(void) { int rc; - rc = vtpmx_init(); - if (rc) { - pr_err("couldn't create vtpmx device\n"); - return rc; - } - workqueue = create_workqueue("tpm-vtpm"); if (!workqueue) { pr_err("couldn't create workqueue\n"); - rc = -ENOMEM; - goto err_vtpmx_cleanup; + return -ENOMEM; } - return 0; - -err_vtpmx_cleanup: - vtpmx_cleanup(); + rc = misc_register(&vtpmx_miscdev); + if (rc) { + pr_err("couldn't create vtpmx device\n"); + destroy_workqueue(workqueue); + } return rc; } @@ -721,7 +705,7 @@ static int __init vtpm_module_init(void) static void __exit vtpm_module_exit(void) { destroy_workqueue(workqueue); - vtpmx_cleanup(); + misc_deregister(&vtpmx_miscdev); } module_init(vtpm_module_init); From 21d5d3eb36bf843d208742a3d942e8b86c202400 Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Thu, 15 Jun 2023 11:08:15 +0300 Subject: [PATCH 381/509] mtd: rawnand: meson: fix unaligned DMA buffers handling commit 98480a181a08ceeede417e5b28f6d0429d8ae156 upstream. Meson NAND controller requires 8 bytes alignment for DMA addresses, otherwise it "aligns" passed address by itself thus accessing invalid location in the provided buffer. This patch makes unaligned buffers to be reallocated to become valid. Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller") Cc: Signed-off-by: Arseniy Krasnov Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230615080815.3291006-1-AVKrasnov@sberdevices.ru Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/meson_nand.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 228612d82f31..ee3976b7e197 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -72,6 +72,7 @@ #define GENCMDIADDRH(aih, addr) ((aih) | (((addr) >> 16) & 0xffff)) #define DMA_DIR(dir) ((dir) ? NFC_CMD_N2M : NFC_CMD_M2N) +#define DMA_ADDR_ALIGN 8 #define ECC_CHECK_RETURN_FF (-1) @@ -838,6 +839,9 @@ static int meson_nfc_read_oob(struct nand_chip *nand, int page) static bool meson_nfc_is_buffer_dma_safe(const void *buffer) { + if ((uintptr_t)buffer % DMA_ADDR_ALIGN) + return false; + if (virt_addr_valid(buffer) && (!object_is_on_stack(buffer))) return true; return false; From fe1ae1fb507a2e14578fe0fb24209cef8a02b436 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 22 Jun 2023 03:31:07 -0700 Subject: [PATCH 382/509] net: bcmgenet: Ensure MDIO unregistration has clocks enabled commit 1b5ea7ffb7a3bdfffb4b7f40ce0d20a3372ee405 upstream. With support for Ethernet PHY LEDs having been added, while unregistering a MDIO bus and its child device liks PHYs there may be "late" accesses to the MDIO bus. One typical use case is setting the PHY LEDs brightness to OFF for instance. We need to ensure that the MDIO bus controller remains entirely functional since it runs off the main GENET adapter clock. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230617155500.4005881-1-andrew@lunn.ch/ Fixes: 9a4e79697009 ("net: bcmgenet: utilize generic Broadcom UniMAC MDIO controller driver") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230622103107.1760280-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 4b875838a646..99aba64f03c2 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -624,5 +624,7 @@ void bcmgenet_mii_exit(struct net_device *dev) if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); of_node_put(priv->phy_dn); + clk_prepare_enable(priv->clk); platform_device_unregister(priv->mii_pdev); + clk_disable_unprepare(priv->clk); } From 3d1d037f274940201b95d67dfc223b184cc009ce Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Tue, 30 May 2023 11:44:36 +0530 Subject: [PATCH 383/509] powerpc: Fail build if using recordmcount with binutils v2.37 commit 25ea739ea1d4d3de41acc4f4eb2d1a97eee0eb75 upstream. binutils v2.37 drops unused section symbols, which prevents recordmcount from capturing mcount locations in sections that have no non-weak symbols. This results in a build failure with a message such as: Cannot find symbol for section 12: .text.perf_callchain_kernel. kernel/events/callchain.o: failed The change to binutils was reverted for v2.38, so this behavior is specific to binutils v2.37: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=c09c8b42021180eee9495bd50d8b35e683d3901b Objtool is able to cope with such sections, so this issue is specific to recordmcount. Fail the build and print a warning if binutils v2.37 is detected and if we are using recordmcount. Cc: stable@vger.kernel.org Suggested-by: Joel Stanley Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20230530061436.56925-1-naveen@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a3f66ade09b3..912e64ab5f24 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -429,3 +429,11 @@ checkbin: echo -n '*** Please use a different binutils version.' ; \ false ; \ fi + @if test "x${CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT}" = "xy" -a \ + "x${CONFIG_LD_IS_BFD}" = "xy" -a \ + "${CONFIG_LD_VERSION}" = "23700" ; then \ + echo -n '*** binutils 2.37 drops unused section symbols, which recordmcount ' ; \ + echo 'is unable to handle.' ; \ + echo '*** Please use a different binutils version.' ; \ + false ; \ + fi From 3bd4d316b1a8d68b28216c807801b7b72a270ab9 Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Wed, 14 Jun 2023 17:24:45 +0530 Subject: [PATCH 384/509] misc: fastrpc: Create fastrpc scalar with correct buffer count commit 0b4e32df3e09406b835d8230b9331273f2805058 upstream. A process can spawn a PD on DSP with some attributes that can be associated with the PD during spawn and run. The invocation corresponding to the create request with attributes has total 4 buffers at the DSP side implementation. If this number is not correct, the invocation is expected to fail on DSP. Added change to use correct number of buffer count for creating fastrpc scalar. Fixes: d73f71c7c6ee ("misc: fastrpc: Add support for create remote init process") Cc: stable Tested-by: Ekansh Gupta Signed-off-by: Ekansh Gupta Message-ID: <1686743685-21715-1-git-send-email-quic_ekangupt@quicinc.com> Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 2488a9a67d18..eed047e971e7 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1106,7 +1106,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl, sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE, 4, 0); if (init.attrs) - sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_ATTR, 6, 0); + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_ATTR, 4, 0); err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, args); From 8c723eef989bc419585237daa467b787ddca5415 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 1 Jun 2023 19:23:41 +0800 Subject: [PATCH 385/509] erofs: fix compact 4B support for 16k block size commit 001b8ccd0650727e54ec16ef72bf1b8eeab7168e upstream. In compact 4B, two adjacent lclusters are packed together as a unit to form on-disk indexes for effective random access, as below: (amortized = 4, vcnt = 2) _____________________________________________ |___@_____ encoded bits __________|_ blkaddr _| 0 . amortized * vcnt = 8 . . . . amortized * vcnt - 4 = 4 . . .____________________________. |_type (2 bits)_|_clusterofs_| Therefore, encoded bits for each pack are 32 bits (4 bytes). IOWs, since each lcluster can get 16 bits for its type and clusterofs, the maximum supported lclustersize for compact 4B format is 16k (14 bits). Fix this to enable compact 4B format for 16k lclusters (blocks), which is tested on an arm64 server with 16k page size. Fixes: 152a333a5895 ("staging: erofs: add compacted compression indexes support") Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20230601112341.56960-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman --- fs/erofs/zmap.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index f18194fd8d77..a5537a9f8f36 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -215,7 +215,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, int i; u8 *in, type; - if (1 << amortizedshift == 4) + if (1 << amortizedshift == 4 && lclusterbits <= 14) vcnt = 2; else if (1 << amortizedshift == 2 && lclusterbits == 12) vcnt = 16; @@ -273,7 +273,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); - const unsigned int lclusterbits = vi->z_logical_clusterbits; const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) + vi->inode_isize + vi->xattr_isize, 8) + sizeof(struct z_erofs_map_header); @@ -283,9 +282,6 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, erofs_off_t pos; int err; - if (lclusterbits != 12) - return -EOPNOTSUPP; - if (lcn >= totalidx) return -EINVAL; From 8fbe951d65464db28005803b1fa1b091342289d3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 26 Jun 2023 15:50:14 +0800 Subject: [PATCH 386/509] MIPS: Loongson: Fix cpu_probe_loongson() again commit 65fee014dc41a774bcd94896f3fb380bc39d8dda upstream. Commit 7db5e9e9e5e6c10d7d ("MIPS: loongson64: fix FTLB configuration") move decode_configs() from the beginning of cpu_probe_loongson() to the end in order to fix FTLB configuration. However, it breaks the CPUCFG decoding because decode_configs() use "c->options = xxxx" rather than "c->options |= xxxx", all information get from CPUCFG by decode_cpucfg() is lost. This causes error when creating a KVM guest on Loongson-3A4000: Exception Code: 4 not handled @ PC: 0000000087ad5981, inst: 0xcb7a1898 BadVaddr: 0x0 Status: 0x0 Fix this by moving the c->cputype setting to the beginning and moving decode_configs() after that. Fixes: 7db5e9e9e5e6c10d7d ("MIPS: loongson64: fix FTLB configuration") Cc: stable@vger.kernel.org Cc: Huang Pei Signed-off-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/cpu-probe.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index d120201910ac..f8d1933bfe82 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1721,7 +1721,10 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c) static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { + c->cputype = CPU_LOONGSON64; + /* All Loongson processors covered here define ExcCode 16 as GSExc. */ + decode_configs(c); c->options |= MIPS_CPU_GSEXCEX; switch (c->processor_id & PRID_IMP_MASK) { @@ -1731,7 +1734,6 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) case PRID_REV_LOONGSON2K_R1_1: case PRID_REV_LOONGSON2K_R1_2: case PRID_REV_LOONGSON2K_R1_3: - c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "Loongson-2K"; set_elf_platform(cpu, "gs264e"); set_isa(c, MIPS_CPU_ISA_M64R2); @@ -1744,14 +1746,12 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_LOONGSON3A_R2_0: case PRID_REV_LOONGSON3A_R2_1: - c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); break; case PRID_REV_LOONGSON3A_R3_0: case PRID_REV_LOONGSON3A_R3_1: - c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); @@ -1771,7 +1771,6 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */ break; case PRID_IMP_LOONGSON_64G: - c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); @@ -1781,8 +1780,6 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) panic("Unknown Loongson Processor ID!"); break; } - - decode_configs(c); } #else static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { } From be99faf0c4dbf99c6c7d4097a23c1ebb2b6192f9 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Wed, 15 Mar 2023 09:31:23 +0800 Subject: [PATCH 387/509] ext4: Fix reusing stale buffer heads from last failed mounting commit 26fb5290240dc31cae99b8b4dd2af7f46dfcba6b upstream. Following process makes ext4 load stale buffer heads from last failed mounting in a new mounting operation: mount_bdev ext4_fill_super | ext4_load_and_init_journal | ext4_load_journal | jbd2_journal_load | load_superblock | journal_get_superblock | set_buffer_verified(bh) // buffer head is verified | jbd2_journal_recover // failed caused by EIO | goto failed_mount3a // skip 'sb->s_root' initialization deactivate_locked_super kill_block_super generic_shutdown_super if (sb->s_root) // false, skip ext4_put_super->invalidate_bdev-> // invalidate_mapping_pages->mapping_evict_folio-> // filemap_release_folio->try_to_free_buffers, which // cannot drop buffer head. blkdev_put blkdev_put_whole if (atomic_dec_and_test(&bdev->bd_openers)) // false, systemd-udev happens to open the device. Then // blkdev_flush_mapping->kill_bdev->truncate_inode_pages-> // truncate_inode_folio->truncate_cleanup_folio-> // folio_invalidate->block_invalidate_folio-> // filemap_release_folio->try_to_free_buffers will be skipped, // dropping buffer head is missed again. Second mount: ext4_fill_super ext4_load_and_init_journal ext4_load_journal ext4_get_journal jbd2_journal_init_inode journal_init_common bh = getblk_unmovable bh = __find_get_block // Found stale bh in last failed mounting journal->j_sb_buffer = bh jbd2_journal_load load_superblock journal_get_superblock if (buffer_verified(bh)) // true, skip journal->j_format_version = 2, value is 0 jbd2_journal_recover do_one_pass next_log_block += count_tags(journal, bh) // According to journal_tag_bytes(), 'tag_bytes' calculating is // affected by jbd2_has_feature_csum3(), jbd2_has_feature_csum3() // returns false because 'j->j_format_version >= 2' is not true, // then we get wrong next_log_block. The do_one_pass may exit // early whenoccuring non JBD2_MAGIC_NUMBER in 'next_log_block'. The filesystem is corrupted here, journal is partially replayed, and new journal sequence number actually is already used by last mounting. The invalidate_bdev() can drop all buffer heads even racing with bare reading block device(eg. systemd-udev), so we can fix it by invalidating bdev in error handling path in __ext4_fill_super(). Fetch a reproducer in [Link]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217171 Fixes: 25ed6e8a54df ("jbd2: enable journal clients to enable v2 checksumming") Cc: stable@vger.kernel.org # v3.5 Signed-off-by: Zhihao Cheng Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230315013128.3911115-2-chengzhihao1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 84b4fc9833e3..e386d67cff9d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1091,6 +1091,12 @@ static void ext4_blkdev_remove(struct ext4_sb_info *sbi) struct block_device *bdev; bdev = sbi->s_journal_bdev; if (bdev) { + /* + * Invalidate the journal device's buffers. We don't want them + * floating about in memory - the physical journal device may + * hotswapped, and it breaks the `ro-after' testing code. + */ + invalidate_bdev(bdev); ext4_blkdev_put(bdev); sbi->s_journal_bdev = NULL; } @@ -1230,13 +1236,7 @@ static void ext4_put_super(struct super_block *sb) sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) { - /* - * Invalidate the journal device's buffers. We don't want them - * floating about in memory - the physical journal device may - * hotswapped, and it breaks the `ro-after' testing code. - */ sync_blockdev(sbi->s_journal_bdev); - invalidate_bdev(sbi->s_journal_bdev); ext4_blkdev_remove(sbi); } @@ -5206,6 +5206,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) brelse(bh); ext4_blkdev_remove(sbi); out_fail: + invalidate_bdev(sb->s_bdev); sb->s_fs_info = NULL; kfree(sbi->s_blockgroup_lock); out_free_base: From d054422eb609da97699d900959bc9a351761f313 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sat, 3 Jun 2023 23:03:18 +0800 Subject: [PATCH 388/509] ext4: fix wrong unit use in ext4_mb_clear_bb commit 247c3d214c23dfeeeb892e91a82ac1188bdaec9f upstream. Function ext4_issue_discard need count in cluster. Pass count_clusters instead of count to fix the mismatch. Signed-off-by: Kemeng Shi Cc: stable@kernel.org Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20230603150327.3596033-11-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8a51448b7670..d847d836d83e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5549,8 +5549,8 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode, * them with group lock_held */ if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, block_group, bit, count, - NULL); + err = ext4_issue_discard(sb, block_group, bit, + count_clusters, NULL); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%u block:%d count:%lu failed" From 514220246aa8aa6394bba5e1ab35c8e40a7b566e Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sat, 3 Jun 2023 23:03:16 +0800 Subject: [PATCH 389/509] ext4: get block from bh in ext4_free_blocks for fast commit replay commit 11b6890be0084ad4df0e06d89a9fdcc948472c65 upstream. ext4_free_blocks will retrieve block from bh if block parameter is zero. Retrieve block before ext4_free_blocks_simple to avoid potentially passing wrong block to ext4_free_blocks_simple. Signed-off-by: Kemeng Shi Cc: stable@kernel.org Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20230603150327.3596033-9-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d847d836d83e..f0483306eb8d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5634,12 +5634,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); - if (sbi->s_mount_state & EXT4_FC_REPLAY) { - ext4_free_blocks_simple(inode, block, count); - return; - } - - might_sleep(); if (bh) { if (block) BUG_ON(block != bh->b_blocknr); @@ -5647,6 +5641,13 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, block = bh->b_blocknr; } + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + ext4_free_blocks_simple(inode, block, count); + return; + } + + might_sleep(); + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks not in datazone - " From 0a5d12e7107e2ba2c27f57cd33bb2b3380324be5 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sat, 3 Jun 2023 23:03:19 +0800 Subject: [PATCH 390/509] ext4: fix wrong unit use in ext4_mb_new_blocks commit 2ec6d0a5ea72689a79e6f725fd8b443a788ae279 upstream. Function ext4_free_blocks_simple needs count in cluster. Function ext4_free_blocks accepts count in block. Convert count to cluster to fix the mismatch. Signed-off-by: Kemeng Shi Cc: stable@kernel.org Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20230603150327.3596033-12-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f0483306eb8d..3a7192884671 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -5642,7 +5642,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, } if (sbi->s_mount_state & EXT4_FC_REPLAY) { - ext4_free_blocks_simple(inode, block, count); + ext4_free_blocks_simple(inode, block, EXT4_NUM_B2C(sbi, count)); return; } From 84293af5455b17bc674510c962e33f6a7bdd2f97 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 24 Apr 2023 11:38:35 +0800 Subject: [PATCH 391/509] ext4: only update i_reserved_data_blocks on successful block allocation commit de25d6e9610a8b30cce9bbb19b50615d02ebca02 upstream. In our fault injection test, we create an ext4 file, migrate it to non-extent based file, then punch a hole and finally trigger a WARN_ON in the ext4_da_update_reserve_space(): EXT4-fs warning (device sda): ext4_da_update_reserve_space:369: ino 14, used 11 with only 10 reserved data blocks When writing back a non-extent based file, if we enable delalloc, the number of reserved blocks will be subtracted from the number of blocks mapped by ext4_ind_map_blocks(), and the extent status tree will be updated. We update the extent status tree by first removing the old extent_status and then inserting the new extent_status. If the block range we remove happens to be in an extent, then we need to allocate another extent_status with ext4_es_alloc_extent(). use old to remove to add new |----------|------------|------------| old extent_status The problem is that the allocation of a new extent_status failed due to a fault injection, and __es_shrink() did not get free memory, resulting in a return of -ENOMEM. Then do_writepages() retries after receiving -ENOMEM, we map to the same extent again, and the number of reserved blocks is again subtracted from the number of blocks in that extent. Since the blocks in the same extent are subtracted twice, we end up triggering WARN_ON at ext4_da_update_reserve_space() because used > ei->i_reserved_data_blocks. For non-extent based file, we update the number of reserved blocks after ext4_ind_map_blocks() is executed, which causes a problem that when we call ext4_ind_map_blocks() to create a block, it doesn't always create a block, but we always reduce the number of reserved blocks. So we move the logic for updating reserved blocks to ext4_ind_map_blocks() to ensure that the number of reserved blocks is updated only after we do succeed in allocating some new blocks. Fixes: 5f634d064c70 ("ext4: Fix quota accounting error with fallocate") Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230424033846.4732-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/indirect.c | 8 ++++++++ fs/ext4/inode.c | 10 ---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 237983cd8cdc..c2bb2ff3fbb6 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -649,6 +649,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, ext4_update_inode_fsync_trans(handle, inode, 1); count = ar.len; + + /* + * Update reserved blocks/metadata blocks after successful block + * allocation which had been deferred till now. + */ + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ext4_da_update_reserve_space(inode, count, 1); + got_it: map->m_flags |= EXT4_MAP_MAPPED; map->m_pblk = le32_to_cpu(chain[depth-1].key); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d3143746bcdb..365c4d3a434a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -654,16 +654,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, */ ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); } - - /* - * Update reserved blocks/metadata blocks after successful - * block allocation which had been deferred till now. We don't - * support fallocate for non extent files. So we can update - * reserve space here. - */ - if ((retval > 0) && - (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) - ext4_da_update_reserve_space(inode, retval, 1); } if (retval > 0) { From 47b7eaae08e8b2f25bdf37bc14d21be090bcb20f Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Tue, 20 Jun 2023 22:17:00 +0530 Subject: [PATCH 392/509] jfs: jfs_dmap: Validate db_l2nbperpage while mounting commit 11509910c599cbd04585ec35a6d5e1a0053d84c1 upstream. In jfs_dmap.c at line 381, BLKTODMAP is used to get a logical block number inside dbFree(). db_l2nbperpage, which is the log2 number of blocks per page, is passed as an argument to BLKTODMAP which uses it for shifting. Syzbot reported a shift out-of-bounds crash because db_l2nbperpage is too big. This happens because the large value is set without any validation in dbMount() at line 181. Thus, make sure that db_l2nbperpage is correct while mounting. Max number of blocks per page = Page size / Min block size => log2(Max num_block per page) = log2(Page size / Min block size) = log2(Page size) - log2(Min block size) => Max db_l2nbperpage = L2PSIZE - L2MINBLOCKSIZE Reported-and-tested-by: syzbot+d2cd27dcf8e04b232eb2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?id=2a70a453331db32ed491f5cbb07e81bf2d225715 Cc: stable@vger.kernel.org Suggested-by: Dave Kleikamp Signed-off-by: Siddh Raman Pant Signed-off-by: Dave Kleikamp Signed-off-by: Greg Kroah-Hartman --- fs/jfs/jfs_dmap.c | 6 ++++++ fs/jfs/jfs_filsys.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 501263355ef4..bd9af2be352f 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -178,7 +178,13 @@ int dbMount(struct inode *ipbmap) dbmp_le = (struct dbmap_disk *) mp->data; bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); + bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); + if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE) { + err = -EINVAL; + goto err_release_metapage; + } + bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); if (!bmp->db_numag) { err = -EINVAL; diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index b5d702df7111..33ef13a0b110 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -122,7 +122,9 @@ #define NUM_INODE_PER_IAG INOSPERIAG #define MINBLOCKSIZE 512 +#define L2MINBLOCKSIZE 9 #define MAXBLOCKSIZE 4096 +#define L2MAXBLOCKSIZE 12 #define MAXFILESIZE ((s64)1 << 52) #define JFS_LINK_MAX 0xffffffff From 5a89a5cc817e924fa1705f0cbd0e320638f0fc01 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 15 Jun 2023 15:49:59 +0100 Subject: [PATCH 393/509] hwrng: imx-rngc - fix the timeout for init and self check commit d744ae7477190967a3ddc289e2cd4ae59e8b1237 upstream. Fix the timeout that is used for the initialisation and for the self test. wait_for_completion_timeout expects a timeout in jiffies, but RNGC_TIMEOUT is in milliseconds. Call msecs_to_jiffies to do the conversion. Cc: stable@vger.kernel.org Fixes: 1d5449445bd0 ("hwrng: mx-rngc - add a driver for Freescale RNGC") Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/imx-rngc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 9b182e5bfa87..dbb2da630611 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -110,7 +110,7 @@ static int imx_rngc_self_test(struct imx_rngc *rngc) cmd = readl(rngc->base + RNGC_COMMAND); writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND); - ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT); + ret = wait_for_completion_timeout(&rngc->rng_op_done, msecs_to_jiffies(RNGC_TIMEOUT)); imx_rngc_irq_mask_clear(rngc); if (!ret) return -ETIMEDOUT; @@ -187,9 +187,7 @@ static int imx_rngc_init(struct hwrng *rng) cmd = readl(rngc->base + RNGC_COMMAND); writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND); - ret = wait_for_completion_timeout(&rngc->rng_op_done, - RNGC_TIMEOUT); - + ret = wait_for_completion_timeout(&rngc->rng_op_done, msecs_to_jiffies(RNGC_TIMEOUT)); if (!ret) { ret = -ETIMEDOUT; goto err; From d3bab5de91c62a845ca328834e7805122d48a60b Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Wed, 14 Jun 2023 09:42:53 +0200 Subject: [PATCH 394/509] PCI/PM: Avoid putting EloPOS E2/S2/H2 PCIe Ports in D3cold commit 9e30fd26f43b89cb6b4e850a86caa2e50dedb454 upstream. The quirk for Elo i2 introduced in commit 92597f97a40b ("PCI/PM: Avoid putting Elo i2 PCIe Ports in D3cold") is also needed by EloPOS E2/S2/H2 which uses the same Continental Z2 board. Change the quirk to match the board instead of system. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215715 Link: https://lore.kernel.org/r/20230614074253.22318-1-linux@zary.sk Signed-off-by: Ondrej Zary Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d37013d007b6..1f8106ec7094 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2830,13 +2830,13 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { { /* * Downstream device is not accessible after putting a root port - * into D3cold and back into D0 on Elo i2. + * into D3cold and back into D0 on Elo Continental Z2 board */ - .ident = "Elo i2", + .ident = "Elo Continental Z2", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"), - DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"), - DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"), + DMI_MATCH(DMI_BOARD_VENDOR, "Elo Touch Solutions"), + DMI_MATCH(DMI_BOARD_NAME, "Geminilake"), + DMI_MATCH(DMI_BOARD_VERSION, "Continental Z2"), }, }, #endif From aca71b004a66475fa03172b1a8ee62a5590b0d88 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 7 Jun 2023 18:18:47 +0100 Subject: [PATCH 395/509] PCI: Add function 1 DMA alias quirk for Marvell 88SE9235 commit 88d341716b83abd355558523186ca488918627ee upstream. Marvell's own product brief implies the 92xx series are a closely related family, and sure enough it turns out that 9235 seems to need the same quirk as the other three, although possibly only when certain ports are used. Link: https://lore.kernel.org/linux-iommu/2a699a99-545c-1324-e052-7d2f41fed1ae@yahoo.co.uk/ Link: https://lore.kernel.org/r/731507e05d70239aec96fcbfab6e65d8ce00edd2.1686157165.git.robin.murphy@arm.com Reported-by: Jason Adriaanse Signed-off-by: Robin Murphy Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index c1ebd5e12b06..c0d113481191 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4123,6 +4123,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9220, /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c49 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230, quirk_dma_func1_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9235, + quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642, quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645, From 48e11e7c81b91002a120a513312a4de9f5ba7f08 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 19 Jun 2023 20:34:00 +0530 Subject: [PATCH 396/509] PCI: qcom: Disable write access to read only registers for IP v2.3.3 commit a33d700e8eea76c62120cb3dbf5e01328f18319a upstream. In the post init sequence of v2.9.0, write access to read only registers are not disabled after updating the registers. Fix it by disabling the access after register update. Link: https://lore.kernel.org/r/20230619150408.8468-2-manivannan.sadhasivam@linaro.org Fixes: 5d76117f070d ("PCI: qcom: Add support for IPQ8074 PCIe controller") Signed-off-by: Manivannan Sadhasivam Signed-off-by: Lorenzo Pieralisi Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index c68e14271c02..737cc9d6fa6a 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -771,6 +771,8 @@ static int qcom_pcie_get_resources_2_4_0(struct qcom_pcie *pcie) return PTR_ERR(res->phy_ahb_reset); } + dw_pcie_dbi_ro_wr_dis(pci); + return 0; } From bec3e0f7f2724141ba5ad9ef952fc144a88ebf08 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:50 +0200 Subject: [PATCH 397/509] PCI: rockchip: Assert PCI Configuration Enable bit after probe commit f397fd4ac1fa3afcabd8cee030f953ccaed2a364 upstream. Assert PCI Configuration Enable bit after probe. When this bit is left to 0 in the endpoint mode, the RK3399 PCIe endpoint core will generate configuration request retry status (CRS) messages back to the root complex. Assert this bit after probe to allow the RK3399 PCIe endpoint core to reply to configuration requests from the root complex. This is documented in section 17.5.8.1.2 of the RK3399 TRM. Link: https://lore.kernel.org/r/20230418074700.1083505-4-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 379cde59988c..8af1b9b9e8db 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -631,6 +631,9 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR; + rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE, + PCIE_CLIENT_CONFIG); + return 0; err_epc_mem_exit: pci_epc_mem_exit(epc); From ddda61419af341ad9ee033d66f24970b391c56f2 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:49 +0200 Subject: [PATCH 398/509] PCI: rockchip: Write PCI Device ID to correct register commit 1f1c42ece18de365c976a060f3c8eb481b038e3a upstream. Write PCI Device ID (DID) to the correct register. The Device ID was not updated through the correct register. Device ID was written to a read-only register and therefore did not work. The Device ID is now set through the correct register. This is documented in the RK3399 TRM section 17.6.6.1.1 Link: https://lore.kernel.org/r/20230418074700.1083505-3-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 6 ++++-- drivers/pci/controller/pcie-rockchip.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 8af1b9b9e8db..2e28ed780acd 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -125,6 +125,7 @@ static void rockchip_pcie_prog_ep_ob_atu(struct rockchip_pcie *rockchip, u8 fn, static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, struct pci_epf_header *hdr) { + u32 reg; struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; @@ -137,8 +138,9 @@ static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, PCIE_CORE_CONFIG_VENDOR); } - rockchip_pcie_write(rockchip, hdr->deviceid << 16, - ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + PCI_VENDOR_ID); + reg = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_DID_VID); + reg = (reg & 0xFFFF) | (hdr->deviceid << 16); + rockchip_pcie_write(rockchip, reg, PCIE_EP_CONFIG_DID_VID); rockchip_pcie_write(rockchip, hdr->revid | diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index c7d0178fc8c2..173f47d4fa7b 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -132,6 +132,8 @@ #define PCIE_RC_RP_ATS_BASE 0x400000 #define PCIE_RC_CONFIG_NORMAL_BASE 0x800000 #define PCIE_RC_CONFIG_BASE 0xa00000 +#define PCIE_EP_CONFIG_BASE 0xa00000 +#define PCIE_EP_CONFIG_DID_VID (PCIE_EP_CONFIG_BASE + 0x00) #define PCIE_RC_CONFIG_RID_CCR (PCIE_RC_CONFIG_BASE + 0x08) #define PCIE_RC_CONFIG_SCC_SHIFT 16 #define PCIE_RC_CONFIG_DCR (PCIE_RC_CONFIG_BASE + 0xc4) From c417a4c7de1d5112a13ffc304a0a29bf7bd1107f Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:51 +0200 Subject: [PATCH 399/509] PCI: rockchip: Add poll and timeout to wait for PHY PLLs to be locked commit 9dd3c7c4c8c3f7f010d9cdb7c3f42506d93c9527 upstream. The RK3399 PCIe controller should wait until the PHY PLLs are locked. Add poll and timeout to wait for PHY PLLs to be locked. If they cannot be locked generate error message and jump to error handler. Accessing registers in the PHY clock domain when PLLs are not locked causes hang The PHY PLLs status is checked through a side channel register. This is documented in the TRM section 17.5.8.1 "PCIe Initialization Sequence". Link: https://lore.kernel.org/r/20230418074700.1083505-5-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip.c | 17 +++++++++++++++++ drivers/pci/controller/pcie-rockchip.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c index 990a00e08bc5..1aa84035a8bc 100644 --- a/drivers/pci/controller/pcie-rockchip.c +++ b/drivers/pci/controller/pcie-rockchip.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -153,6 +154,12 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) } EXPORT_SYMBOL_GPL(rockchip_pcie_parse_dt); +#define rockchip_pcie_read_addr(addr) rockchip_pcie_read(rockchip, addr) +/* 100 ms max wait time for PHY PLLs to lock */ +#define RK_PHY_PLL_LOCK_TIMEOUT_US 100000 +/* Sleep should be less than 20ms */ +#define RK_PHY_PLL_LOCK_SLEEP_US 1000 + int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) { struct device *dev = rockchip->dev; @@ -254,6 +261,16 @@ int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) } } + err = readx_poll_timeout(rockchip_pcie_read_addr, + PCIE_CLIENT_SIDE_BAND_STATUS, + regs, !(regs & PCIE_CLIENT_PHY_ST), + RK_PHY_PLL_LOCK_SLEEP_US, + RK_PHY_PLL_LOCK_TIMEOUT_US); + if (err) { + dev_err(dev, "PHY PLLs could not lock, %d\n", err); + goto err_power_off_phy; + } + /* * Please don't reorder the deassert sequence of the following * four reset pins. diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 173f47d4fa7b..3503fdc266c6 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -37,6 +37,8 @@ #define PCIE_CLIENT_MODE_EP HIWORD_UPDATE(0x0040, 0) #define PCIE_CLIENT_GEN_SEL_1 HIWORD_UPDATE(0x0080, 0) #define PCIE_CLIENT_GEN_SEL_2 HIWORD_UPDATE_BIT(0x0080) +#define PCIE_CLIENT_SIDE_BAND_STATUS (PCIE_CLIENT_BASE + 0x20) +#define PCIE_CLIENT_PHY_ST BIT(12) #define PCIE_CLIENT_DEBUG_OUT_0 (PCIE_CLIENT_BASE + 0x3c) #define PCIE_CLIENT_DEBUG_LTSSM_MASK GENMASK(5, 0) #define PCIE_CLIENT_DEBUG_LTSSM_L1 0x18 From 36eb13031227e117ccedbdd090032fd8508e97f4 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:54 +0200 Subject: [PATCH 400/509] PCI: rockchip: Fix legacy IRQ generation for RK3399 PCIe endpoint core commit 166e89d99dd85a856343cca51eee781b793801f2 upstream. Fix legacy IRQ generation for RK3399 PCIe endpoint core according to the technical reference manual (TRM). Assert and deassert legacy interrupt (INTx) through the legacy interrupt control register ("PCIE_CLIENT_LEGACY_INT_CTRL") instead of manually generating a PCIe message. The generation of the legacy interrupt was tested and validated with the PCIe endpoint test driver. Link: https://lore.kernel.org/r/20230418074700.1083505-8-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 45 ++++++----------------- drivers/pci/controller/pcie-rockchip.h | 6 ++- 2 files changed, 16 insertions(+), 35 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 2e28ed780acd..f929b9dbb7e4 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -347,48 +347,25 @@ static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn) } static void rockchip_pcie_ep_assert_intx(struct rockchip_pcie_ep *ep, u8 fn, - u8 intx, bool is_asserted) + u8 intx, bool do_assert) { struct rockchip_pcie *rockchip = &ep->rockchip; - u32 r = ep->max_regions - 1; - u32 offset; - u32 status; - u8 msg_code; - - if (unlikely(ep->irq_pci_addr != ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR || - ep->irq_pci_fn != fn)) { - rockchip_pcie_prog_ep_ob_atu(rockchip, fn, r, - AXI_WRAPPER_NOR_MSG, - ep->irq_phys_addr, 0, 0); - ep->irq_pci_addr = ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR; - ep->irq_pci_fn = fn; - } intx &= 3; - if (is_asserted) { + + if (do_assert) { ep->irq_pending |= BIT(intx); - msg_code = ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTA + intx; + rockchip_pcie_write(rockchip, + PCIE_CLIENT_INT_IN_ASSERT | + PCIE_CLIENT_INT_PEND_ST_PEND, + PCIE_CLIENT_LEGACY_INT_CTRL); } else { ep->irq_pending &= ~BIT(intx); - msg_code = ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTA + intx; + rockchip_pcie_write(rockchip, + PCIE_CLIENT_INT_IN_DEASSERT | + PCIE_CLIENT_INT_PEND_ST_NORMAL, + PCIE_CLIENT_LEGACY_INT_CTRL); } - - status = rockchip_pcie_read(rockchip, - ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + - ROCKCHIP_PCIE_EP_CMD_STATUS); - status &= ROCKCHIP_PCIE_EP_CMD_STATUS_IS; - - if ((status != 0) ^ (ep->irq_pending != 0)) { - status ^= ROCKCHIP_PCIE_EP_CMD_STATUS_IS; - rockchip_pcie_write(rockchip, status, - ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + - ROCKCHIP_PCIE_EP_CMD_STATUS); - } - - offset = - ROCKCHIP_PCIE_MSG_ROUTING(ROCKCHIP_PCIE_MSG_ROUTING_LOCAL_INTX) | - ROCKCHIP_PCIE_MSG_CODE(msg_code) | ROCKCHIP_PCIE_MSG_NO_DATA; - writel(0, ep->irq_cpu_addr + offset); } static int rockchip_pcie_ep_send_legacy_irq(struct rockchip_pcie_ep *ep, u8 fn, diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 3503fdc266c6..b714cc2410d0 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -37,6 +37,11 @@ #define PCIE_CLIENT_MODE_EP HIWORD_UPDATE(0x0040, 0) #define PCIE_CLIENT_GEN_SEL_1 HIWORD_UPDATE(0x0080, 0) #define PCIE_CLIENT_GEN_SEL_2 HIWORD_UPDATE_BIT(0x0080) +#define PCIE_CLIENT_LEGACY_INT_CTRL (PCIE_CLIENT_BASE + 0x0c) +#define PCIE_CLIENT_INT_IN_ASSERT HIWORD_UPDATE_BIT(0x0002) +#define PCIE_CLIENT_INT_IN_DEASSERT HIWORD_UPDATE(0x0002, 0) +#define PCIE_CLIENT_INT_PEND_ST_PEND HIWORD_UPDATE_BIT(0x0001) +#define PCIE_CLIENT_INT_PEND_ST_NORMAL HIWORD_UPDATE(0x0001, 0) #define PCIE_CLIENT_SIDE_BAND_STATUS (PCIE_CLIENT_BASE + 0x20) #define PCIE_CLIENT_PHY_ST BIT(12) #define PCIE_CLIENT_DEBUG_OUT_0 (PCIE_CLIENT_BASE + 0x3c) @@ -234,7 +239,6 @@ #define ROCKCHIP_PCIE_EP_MSI_CTRL_ME BIT(16) #define ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP BIT(24) #define ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR 0x1 -#define ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR 0x3 #define ROCKCHIP_PCIE_EP_FUNC_BASE(fn) (((fn) << 12) & GENMASK(19, 12)) #define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar) \ (PCIE_RC_RP_ATS_BASE + 0x0840 + (fn) * 0x0040 + (bar) * 0x0008) From f1986416cfb4474651916f87af948da581b1bccf Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:56 +0200 Subject: [PATCH 401/509] PCI: rockchip: Use u32 variable to access 32-bit registers commit 8962b2cb39119cbda4fc69a1f83957824f102f81 upstream. Previously u16 variables were used to access 32-bit registers, this resulted in not all of the data being read from the registers. Also the left shift of more than 16-bits would result in moving data out of the variable. Use u32 variables to access 32-bit registers Link: https://lore.kernel.org/r/20230418074700.1083505-10-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 10 +++++----- drivers/pci/controller/pcie-rockchip.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index f929b9dbb7e4..a199ac2c7d5b 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -314,15 +314,15 @@ static int rockchip_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, { struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; - u16 flags; + u32 flags; flags = rockchip_pcie_read(rockchip, ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + ROCKCHIP_PCIE_EP_MSI_CTRL_REG); flags &= ~ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK; flags |= - ((multi_msg_cap << 1) << ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET) | - PCI_MSI_FLAGS_64BIT; + (multi_msg_cap << ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET) | + (PCI_MSI_FLAGS_64BIT << ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET); flags &= ~ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP; rockchip_pcie_write(rockchip, flags, ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + @@ -334,7 +334,7 @@ static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn) { struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; - u16 flags; + u32 flags; flags = rockchip_pcie_read(rockchip, ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + @@ -395,7 +395,7 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn, u8 interrupt_num) { struct rockchip_pcie *rockchip = &ep->rockchip; - u16 flags, mme, data, data_mask; + u32 flags, mme, data, data_mask; u8 msi_count; u64 pci_addr, pci_addr_mask = 0xff; diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index b714cc2410d0..76a5f96bfd0a 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -232,6 +232,7 @@ #define ROCKCHIP_PCIE_EP_CMD_STATUS 0x4 #define ROCKCHIP_PCIE_EP_CMD_STATUS_IS BIT(19) #define ROCKCHIP_PCIE_EP_MSI_CTRL_REG 0x90 +#define ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET 16 #define ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET 17 #define ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK GENMASK(19, 17) #define ROCKCHIP_PCIE_EP_MSI_CTRL_MME_OFFSET 20 From 8c90c466e38e2002e358445d62f927c1805e0e52 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 18 Apr 2023 09:46:58 +0200 Subject: [PATCH 402/509] PCI: rockchip: Set address alignment for endpoint mode commit 7e6689b34a815bd379dfdbe9855d36f395ef056c upstream. The address translation unit of the rockchip EP controller does not use the lower 8 bits of a PCIe-space address to map local memory. Thus we must set the align feature field to 256 to let the user know about this constraint. Link: https://lore.kernel.org/r/20230418074700.1083505-12-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Signed-off-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index a199ac2c7d5b..dba8bdc3fc94 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -485,6 +485,7 @@ static const struct pci_epc_features rockchip_pcie_epc_features = { .linkup_notifier = false, .msi_capable = true, .msix_capable = false, + .align = 256, }; static const struct pci_epc_features* From cdf9a7e2cdc7a5464e3cc6d0b715ba2b1d215521 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 15 Apr 2023 11:35:39 +0900 Subject: [PATCH 403/509] misc: pci_endpoint_test: Free IRQs before removing the device commit f61b7634a3249d12b9daa36ffbdb9965b6f24c6c upstream. In pci_endpoint_test_remove(), freeing the IRQs after removing the device creates a small race window for IRQs to be received with the test device memory already released, causing the IRQ handler to access invalid memory, resulting in an oops. Free the device IRQs before removing the device to avoid this issue. Link: https://lore.kernel.org/r/20230415023542.77601-15-dlemoal@kernel.org Fixes: e03327122e2c ("pci_endpoint_test: Add 2 ioctl commands") Signed-off-by: Damien Le Moal Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pci_endpoint_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 48eec5fe7397..ab54fa39b9b0 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -935,6 +935,9 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) if (id < 0) return; + pci_endpoint_test_release_irq(test); + pci_endpoint_test_free_irq_vectors(test); + misc_deregister(&test->miscdev); kfree(misc_device->name); kfree(test->name); @@ -944,9 +947,6 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) pci_iounmap(pdev, test->bar[bar]); } - pci_endpoint_test_release_irq(test); - pci_endpoint_test_free_irq_vectors(test); - pci_release_regions(pdev); pci_disable_device(pdev); } From 8e3c7776405a31400999160247bf9443c35c2a3b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 15 Apr 2023 11:35:40 +0900 Subject: [PATCH 404/509] misc: pci_endpoint_test: Re-init completion for every test commit fb620ae73b70c2f57b9d3e911fc24c024ba2324f upstream. The irq_raised completion used to detect the end of a test case is initialized when the test device is probed, but never reinitialized again before a test case. As a result, the irq_raised completion synchronization is effective only for the first ioctl test case executed. Any subsequent call to wait_for_completion() by another ioctl() call will immediately return, potentially too early, leading to false positive failures. Fix this by reinitializing the irq_raised completion before starting a new ioctl() test command. Link: https://lore.kernel.org/r/20230415023542.77601-16-dlemoal@kernel.org Fixes: 2c156ac71c6b ("misc: Add host side PCI driver for PCI test function device") Signed-off-by: Damien Le Moal Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pci_endpoint_test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index ab54fa39b9b0..6c4c85eb7147 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -727,6 +727,10 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, struct pci_dev *pdev = test->pdev; mutex_lock(&test->mutex); + + reinit_completion(&test->irq_raised); + test->last_irq = -ENODATA; + switch (cmd) { case PCITEST_BAR: bar = arg; From 5e9aff5b10c2c98217217c3f14ccd10042e50bbe Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 23 Jun 2023 14:05:23 -0400 Subject: [PATCH 405/509] md/raid0: add discard support for the 'original' layout commit e836007089ba8fdf24e636ef2b007651fb4582e6 upstream. We've found that using raid0 with the 'original' layout and discard enabled with different disk sizes (such that at least two zones are created) can result in data corruption. This is due to the fact that the discard handling in 'raid0_handle_discard()' assumes the 'alternate' layout. We've seen this corruption using ext4 but other filesystems are likely susceptible as well. More specifically, while multiple zones are necessary to create the corruption, the corruption may not occur with multiple zones if they layout in such a way the layout matches what the 'alternate' layout would have produced. Thus, not all raid0 devices with the 'original' layout, different size disks and discard enabled will encounter this corruption. The 3.14 kernel inadvertently changed the raid0 disk layout for different size disks. Thus, running a pre-3.14 kernel and post-3.14 kernel on the same raid0 array could corrupt data. This lead to the creation of the 'original' layout (to match the pre-3.14 layout) and the 'alternate' layout (to match the post 3.14 layout) in the 5.4 kernel time frame and an option to tell the kernel which layout to use (since it couldn't be autodetected). However, when the 'original' layout was added back to 5.4 discard support for the 'original' layout was not added leading this issue. I've been able to reliably reproduce the corruption with the following test case: 1. create raid0 array with different size disks using original layout 2. mkfs 3. mount -o discard 4. create lots of files 5. remove 1/2 the files 6. fstrim -a (or just the mount point for the raid0 array) 7. umount 8. fsck -fn /dev/md0 (spews all sorts of corruptions) Let's fix this by adding proper discard support to the 'original' layout. The fix 'maps' the 'original' layout disks to the order in which they are read/written such that we can compare the disks in the same way that the current 'alternate' layout does. A 'disk_shift' field is added to 'struct strip_zone'. This could be computed on the fly in raid0_handle_discard() but by adding this field, we save some computation in the discard path. Note we could also potentially fix this by re-ordering the disks in the zones that follow the first one, and then always read/writing them using the 'alternate' layout. However, that is seen as a more substantial change, and we are attempting the least invasive fix at this time to remedy the corruption. I've verified the change using the reproducer mentioned above. Typically, the corruption is seen after less than 3 iterations, while the patch has run 500+ iterations. Cc: NeilBrown Cc: Song Liu Fixes: c84a1372df92 ("md/raid0: avoid RAID0 data corruption due to layout confusion.") Cc: stable@vger.kernel.org Signed-off-by: Jason Baron Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230623180523.1901230-1-jbaron@akamai.com Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid0.c | 62 ++++++++++++++++++++++++++++++++++++++++------ drivers/md/raid0.h | 1 + 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index a20332e755e8..ee2cfd6c2dfb 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -274,6 +274,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) goto abort; } + if (conf->layout == RAID0_ORIG_LAYOUT) { + for (i = 1; i < conf->nr_strip_zones; i++) { + sector_t first_sector = conf->strip_zone[i-1].zone_end; + + sector_div(first_sector, mddev->chunk_sectors); + zone = conf->strip_zone + i; + /* disk_shift is first disk index used in the zone */ + zone->disk_shift = sector_div(first_sector, + zone->nb_dev); + } + } + pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; @@ -427,6 +439,20 @@ static void raid0_free(struct mddev *mddev, void *priv) kfree(conf); } +/* + * Convert disk_index to the disk order in which it is read/written. + * For example, if we have 4 disks, they are numbered 0,1,2,3. If we + * write the disks starting at disk 3, then the read/write order would + * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift() + * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map + * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in + * that 'output' space to understand the read/write disk ordering. + */ +static int map_disk_shift(int disk_index, int num_disks, int disk_shift) +{ + return ((disk_index + num_disks - disk_shift) % num_disks); +} + static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) { struct r0conf *conf = mddev->private; @@ -440,7 +466,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) sector_t end_disk_offset; unsigned int end_disk_index; unsigned int disk; + sector_t orig_start, orig_end; + orig_start = start; zone = find_zone(conf, &start); if (bio_end_sector(bio) > zone->zone_end) { @@ -454,6 +482,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) } else end = bio_end_sector(bio); + orig_end = end; if (zone != conf->strip_zone) end = end - zone[-1].zone_end; @@ -465,13 +494,26 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) last_stripe_index = end; sector_div(last_stripe_index, stripe_size); - start_disk_index = (int)(start - first_stripe_index * stripe_size) / - mddev->chunk_sectors; + /* In the first zone the original and alternate layouts are the same */ + if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) { + sector_div(orig_start, mddev->chunk_sectors); + start_disk_index = sector_div(orig_start, zone->nb_dev); + start_disk_index = map_disk_shift(start_disk_index, + zone->nb_dev, + zone->disk_shift); + sector_div(orig_end, mddev->chunk_sectors); + end_disk_index = sector_div(orig_end, zone->nb_dev); + end_disk_index = map_disk_shift(end_disk_index, + zone->nb_dev, zone->disk_shift); + } else { + start_disk_index = (int)(start - first_stripe_index * stripe_size) / + mddev->chunk_sectors; + end_disk_index = (int)(end - last_stripe_index * stripe_size) / + mddev->chunk_sectors; + } start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % mddev->chunk_sectors) + first_stripe_index * mddev->chunk_sectors; - end_disk_index = (int)(end - last_stripe_index * stripe_size) / - mddev->chunk_sectors; end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % mddev->chunk_sectors) + last_stripe_index * mddev->chunk_sectors; @@ -480,18 +522,22 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) sector_t dev_start, dev_end; struct bio *discard_bio = NULL; struct md_rdev *rdev; + int compare_disk; - if (disk < start_disk_index) + compare_disk = map_disk_shift(disk, zone->nb_dev, + zone->disk_shift); + + if (compare_disk < start_disk_index) dev_start = (first_stripe_index + 1) * mddev->chunk_sectors; - else if (disk > start_disk_index) + else if (compare_disk > start_disk_index) dev_start = first_stripe_index * mddev->chunk_sectors; else dev_start = start_disk_offset; - if (disk < end_disk_index) + if (compare_disk < end_disk_index) dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; - else if (disk > end_disk_index) + else if (compare_disk > end_disk_index) dev_end = last_stripe_index * mddev->chunk_sectors; else dev_end = end_disk_offset; diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 3816e5477db1..8cc761ca7423 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -6,6 +6,7 @@ struct strip_zone { sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ int nb_dev; /* # of devices attached to the zone */ + int disk_shift; /* start disk for the original layout */ }; /* Linux 3.14 (20d0189b101) made an unintended change to From 23d5004ee7aaa0688accdd5fea6e82a7e6e3d151 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 19 May 2023 11:21:24 -0400 Subject: [PATCH 406/509] fs: dlm: return positive pid value for F_GETLK commit 92655fbda5c05950a411eaabc19e025e86e2a291 upstream. The GETLK pid values have all been negated since commit 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks"). Revert this for local pids, and leave in place negative pids for remote owners. Cc: stable@vger.kernel.org Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks") Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/plock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index a10d2bcfe75a..edce0b25cd90 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -363,7 +363,9 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, locks_init_lock(fl); fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; fl->fl_flags = FL_POSIX; - fl->fl_pid = -op->info.pid; + fl->fl_pid = op->info.pid; + if (op->info.nodeid != dlm_our_nodeid()) + fl->fl_pid = -fl->fl_pid; fl->fl_start = op->info.start; fl->fl_end = op->info.end; rv = 0; From b9ec9372a47a09377c50dc6eb8c3c8709d2790f8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 9 Jan 2023 17:18:16 -0800 Subject: [PATCH 407/509] drm/atomic: Allow vblank-enabled + self-refresh "disable" commit 9d0e3cac3517942a6e00eeecfe583a98715edb16 upstream. The self-refresh helper framework overloads "disable" to sometimes mean "go into self-refresh mode," and this mode activates automatically (e.g., after some period of unchanging display output). In such cases, the display pipe is still considered "on", and user-space is not aware that we went into self-refresh mode. Thus, users may expect that vblank-related features (such as DRM_IOCTL_WAIT_VBLANK) still work properly. However, we trigger the WARN_ONCE() here if a CRTC driver tries to leave vblank enabled. Add a different expectation: that CRTCs *should* leave vblank enabled when going into self-refresh. This patch is preparation for another patch -- "drm/rockchip: vop: Leave vblank enabled in self-refresh" -- which resolves conflicts between the above self-refresh behavior and the API tests in IGT's kms_vblank test module. == Some alternatives discussed: == It's likely that on many display controllers, vblank interrupts will turn off when the CRTC is disabled, and so in some cases, self-refresh may not support vblank. To support such cases, we might consider additions to the generic helpers such that we fire vblank events based on a timer. However, there is currently only one driver using the common self-refresh helpers (i.e., rockchip), and at least as of commit bed030a49f3e ("drm/rockchip: Don't fully disable vop on self refresh"), the CRTC hardware is powered enough to continue to generate vblank interrupts. So we chose the simpler option of leaving vblank interrupts enabled. We can reevaluate this decision and perhaps augment the helpers if/when we gain a second driver that has different requirements. v3: * include discussion summary v2: * add 'ret != 0' warning case for self-refresh * describe failing test case and relation to drm/rockchip patch better Cc: # dependency for "drm/rockchip: vop: Leave # vblank enabled in self-refresh" Signed-off-by: Brian Norris Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230109171809.v3.1.I3904f697863649eb1be540ecca147a66e42bfad7@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic_helper.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 7fc8e7000046..0fde260b7edd 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1113,7 +1113,16 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state) continue; ret = drm_crtc_vblank_get(crtc); - WARN_ONCE(ret != -EINVAL, "driver forgot to call drm_crtc_vblank_off()\n"); + /* + * Self-refresh is not a true "disable"; ensure vblank remains + * enabled. + */ + if (new_crtc_state->self_refresh_active) + WARN_ONCE(ret != 0, + "driver disabled vblank in self-refresh\n"); + else + WARN_ONCE(ret != -EINVAL, + "driver forgot to call drm_crtc_vblank_off()\n"); if (ret == 0) drm_crtc_vblank_put(crtc); } From d89bd2ecd39bf30ff643ecee52e0955c2871efd3 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 9 Jan 2023 17:18:17 -0800 Subject: [PATCH 408/509] drm/rockchip: vop: Leave vblank enabled in self-refresh commit 2bdba9d4a3baa758c2ca7f5b37b35c7b3391dc42 upstream. If we disable vblank when entering self-refresh, vblank APIs (like DRM_IOCTL_WAIT_VBLANK) no longer work. But user space is not aware when we enter self-refresh, so this appears to be an API violation -- that DRM_IOCTL_WAIT_VBLANK fails with EINVAL whenever the display is idle and enters self-refresh. The downstream driver used by many of these systems never used to disable vblank for PSR, and in fact, even upstream, we didn't do that until radically redesigning the state machine in commit 6c836d965bad ("drm/rockchip: Use the helpers for PSR"). Thus, it seems like a reasonable API fix to simply restore that behavior, and leave vblank enabled. Note that this appears to potentially unbalance the drm_crtc_vblank_{off,on}() calls in some cases, but: (a) drm_crtc_vblank_on() documents this as OK and (b) if I do the naive balancing, I find state machine issues such that we're not in sync properly; so it's easier to take advantage of (a). This issue was exposed by IGT's kms_vblank tests, and reported by KernelCI. The bug has been around a while (longer than KernelCI noticed), but was only exposed once self-refresh was bugfixed more recently, and so KernelCI could properly test it. Some other notes in: https://lore.kernel.org/dri-devel/Y6OCg9BPnJvimQLT@google.com/ Re: renesas/master bisection: igt-kms-rockchip.kms_vblank.pipe-A-wait-forked on rk3399-gru-kevin == Backporting notes: == Marking as 'Fixes' commit 6c836d965bad ("drm/rockchip: Use the helpers for PSR"), but it probably depends on commit bed030a49f3e ("drm/rockchip: Don't fully disable vop on self refresh") as well. We also need the previous patch ("drm/atomic: Allow vblank-enabled + self-refresh "disable""), of course. v3: * no update v2: * skip unnecessary lock/unlock Fixes: 6c836d965bad ("drm/rockchip: Use the helpers for PSR") Cc: Reported-by: "kernelci.org bot" Link: https://lore.kernel.org/dri-devel/Y5itf0+yNIQa6fU4@sirena.org.uk/ Signed-off-by: Brian Norris Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230109171809.v3.2.Ic07cba4ab9a7bd3618a9e4258b8f92ea7d10ae5a@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index af98bfcde518..65dde9df9793 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -702,13 +702,13 @@ static void vop_crtc_atomic_disable(struct drm_crtc *crtc, if (crtc->state->self_refresh_active) rockchip_drm_set_win_enabled(crtc, false); + if (crtc->state->self_refresh_active) + goto out; + mutex_lock(&vop->vop_lock); drm_crtc_vblank_off(crtc); - if (crtc->state->self_refresh_active) - goto out; - /* * Vop standby will take effect at end of current frame, * if dsp hold valid irq happen, it means standby complete. @@ -740,9 +740,9 @@ static void vop_crtc_atomic_disable(struct drm_crtc *crtc, vop_core_clks_disable(vop); pm_runtime_put(vop->dev); -out: mutex_unlock(&vop->vop_lock); +out: if (crtc->state->event && !crtc->state->active) { spin_lock_irq(&crtc->dev->event_lock); drm_crtc_send_vblank_event(crtc, crtc->state->event); From 4016d36fec633f8e48be5807f656f38d7f97b5d2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 23 Jun 2023 10:05:19 -0500 Subject: [PATCH 409/509] drm/amd/display: Correct `DMUB_FW_VERSION` macro commit 274d205cb59f43815542e04b42a9e6d0b9b95eff upstream. The `DMUB_FW_VERSION` macro has a mistake in that the revision field is off by one byte. The last byte is typically used for other purposes and not a revision. Cc: stable@vger.kernel.org Cc: Sean Wang Cc: Marc Rossi Cc: Hamza Mahfooz Cc: Tsung-hua (Ryan) Lin Reviewed-by: Leo Li Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index c6a8d6c54621..882b4e2816b5 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -347,7 +347,7 @@ struct dmub_srv { * of a firmware to know if feature or functionality is supported or present. */ #define DMUB_FW_VERSION(major, minor, revision) \ - ((((major) & 0xFF) << 24) | (((minor) & 0xFF) << 16) | ((revision) & 0xFFFF)) + ((((major) & 0xFF) << 24) | (((minor) & 0xFF) << 16) | (((revision) & 0xFF) << 8)) /** * dmub_srv_create() - creates the DMUB service. From 08673739ed85bb4c465bcddf0c874a01033ac029 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Jun 2023 12:45:17 +0300 Subject: [PATCH 410/509] serial: atmel: don't enable IRQs prematurely commit 27a826837ec9a3e94cc44bd9328b8289b0fcecd7 upstream. The atmel_complete_tx_dma() function disables IRQs at the start of the function by calling spin_lock_irqsave(&port->lock, flags); There is no need to disable them a second time using the spin_lock_irq() function and, in fact, doing so is a bug because it will enable IRQs prematurely when we call spin_unlock_irq(). Just use spin_lock/unlock() instead without disabling or enabling IRQs. Fixes: 08f738be88bb ("serial: at91: add tx dma support") Signed-off-by: Dan Carpenter Reviewed-by: Jiri Slaby Acked-by: Richard Genoud Link: https://lore.kernel.org/r/cb7c39a9-c004-4673-92e1-be4e34b85368@moroto.mountain Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 02fd0e79c8f7..a1249bed6636 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -873,11 +873,11 @@ static void atmel_complete_tx_dma(void *arg) port->icount.tx += atmel_port->tx_len; - spin_lock_irq(&atmel_port->lock_tx); + spin_lock(&atmel_port->lock_tx); async_tx_ack(atmel_port->desc_tx); atmel_port->cookie_tx = -EINVAL; atmel_port->desc_tx = NULL; - spin_unlock_irq(&atmel_port->lock_tx); + spin_unlock(&atmel_port->lock_tx); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); From a49e5a05121c8bc471a57b4916c5393749c24de5 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 10 Jun 2023 17:59:25 +0200 Subject: [PATCH 411/509] tty: serial: samsung_tty: Fix a memory leak in s3c24xx_serial_getclk() in case of error commit a9c09546e903f1068acfa38e1ee18bded7114b37 upstream. If clk_get_rate() fails, the clk that has just been allocated needs to be freed. Cc: # v3.3+ Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andi Shyti Fixes: 5f5a7a5578c5 ("serial: samsung: switch to clkdev based clock lookup") Signed-off-by: Christophe JAILLET Reviewed-by: Jiri Slaby Message-ID: Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index 263c33260d8a..b14f918998d0 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -1313,8 +1313,12 @@ static unsigned int s3c24xx_serial_getclk(struct s3c24xx_uart_port *ourport, continue; rate = clk_get_rate(clk); - if (!rate) + if (!rate) { + dev_err(ourport->port.dev, + "Failed to get clock rate for %s.\n", clkname); + clk_put(clk); continue; + } if (ourport->info->has_divslot) { unsigned long div = rate / req_baud; From 1962717c4649e026a4252fe6625175affd28a593 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 10 Jun 2023 17:59:26 +0200 Subject: [PATCH 412/509] tty: serial: samsung_tty: Fix a memory leak in s3c24xx_serial_getclk() when iterating clk commit 832e231cff476102e8204a9e7bddfe5c6154a375 upstream. When the best clk is searched, we iterate over all possible clk. If we find a better match, the previous one, if any, needs to be freed. If a better match has already been found, we still need to free the new one, otherwise it leaks. Cc: # v3.3+ Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andi Shyti Fixes: 5f5a7a5578c5 ("serial: samsung: switch to clkdev based clock lookup") Signed-off-by: Christophe JAILLET Reviewed-by: Jiri Slaby Message-ID: Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index b14f918998d0..fa5b1321d9b1 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -1344,10 +1344,18 @@ static unsigned int s3c24xx_serial_getclk(struct s3c24xx_uart_port *ourport, calc_deviation = -calc_deviation; if (calc_deviation < deviation) { + /* + * If we find a better clk, release the previous one, if + * any. + */ + if (!IS_ERR(*best_clk)) + clk_put(*best_clk); *best_clk = clk; best_quot = quot; *clk_num = cnt; deviation = calc_deviation; + } else { + clk_put(clk); } } From c04ed61ebf01968d7699b121663982493ed577fb Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 13 Jun 2023 16:15:21 -0500 Subject: [PATCH 413/509] firmware: stratix10-svc: Fix a potential resource leak in svc_create_memory_pool() commit 1995f15590ca222f91193ed11461862b450abfd6 upstream. svc_create_memory_pool() is only called from stratix10_svc_drv_probe(). Most of resources in the probe are managed, but not this memremap() call. There is also no memunmap() call in the file. So switch to devm_memremap() to avoid a resource leak. Cc: stable@vger.kernel.org Fixes: 7ca5ce896524 ("firmware: add Intel Stratix10 service layer driver") Link: https://lore.kernel.org/all/783e9dfbba34e28505c9efa8bba41f97fd0fa1dc.1686109400.git.christophe.jaillet@wanadoo.fr/ Signed-off-by: Christophe JAILLET Signed-off-by: Dinh Nguyen Message-ID: <20230613211521.16366-1-dinguyen@kernel.org> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/stratix10-svc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 78a446cb4348..f432fe7cb60d 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -622,7 +622,7 @@ svc_create_memory_pool(struct platform_device *pdev, end = rounddown(sh_memory->addr + sh_memory->size, PAGE_SIZE); paddr = begin; size = end - begin; - va = memremap(paddr, size, MEMREMAP_WC); + va = devm_memremap(dev, paddr, size, MEMREMAP_WC); if (!va) { dev_err(dev, "fail to remap shared memory\n"); return ERR_PTR(-EINVAL); From 8e807eadf0b9654c8c5b2313a9ed72f68dbfbc21 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 28 Jun 2023 07:57:09 +0800 Subject: [PATCH 414/509] ceph: don't let check_caps skip sending responses for revoke msgs commit 257e6172ab36ebbe295a6c9ee9a9dd0fe54c1dc2 upstream. If a client sends out a cap update dropping caps with the prior 'seq' just before an incoming cap revoke request, then the client may drop the revoke because it believes it's already released the requested capabilities. This causes the MDS to wait indefinitely for the client to respond to the revoke. It's therefore always a good idea to ack the cap revoke request with the bumped up 'seq'. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61782 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Reviewed-by: Patrick Donnelly Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e1fda3923944..432dc2a16e28 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3574,6 +3574,15 @@ static void handle_cap_grant(struct inode *inode, } BUG_ON(cap->issued & ~cap->implemented); + /* don't let check_caps skip sending a response to MDS for revoke msgs */ + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + cap->mds_wanted = 0; + if (cap == ci->i_auth_cap) + check_caps = 1; /* check auth cap only */ + else + check_caps = 2; /* check all caps */ + } + if (extra_info->inline_version > 0 && extra_info->inline_version >= ci->i_inline_version) { ci->i_inline_version = extra_info->inline_version; From b56a07c2a550025d6f3700b64fdd135b3e266726 Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Fri, 2 Jun 2023 17:40:06 +0300 Subject: [PATCH 415/509] xhci: Fix resume issue of some ZHAOXIN hosts commit f927728186f0de1167262d6a632f9f7e96433d1a upstream. On ZHAOXIN ZX-100 project, xHCI can't work normally after resume from system Sx state. To fix this issue, when resume from system Sx state, reinitialize xHCI instead of restore. So, Add XHCI_RESET_ON_RESUME quirk for ZX-100 to fix issue of resuming from system Sx state. Cc: stable@vger.kernel.org Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Message-ID: <20230602144009.1225632-9-mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index aff65cefead2..b71e29aad9cc 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -330,6 +330,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4)) xhci->quirks |= XHCI_NO_SOFT_RETRY; + if (pdev->vendor == PCI_VENDOR_ID_ZHAOXIN) { + if (pdev->device == 0x9202) + xhci->quirks |= XHCI_RESET_ON_RESUME; + } + /* xHC spec requires PCI devices to support D3hot and D3cold */ if (xhci->hci_version >= 0x120) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; From c52e04c58dedfff62b066cacd6f6dd6423eccd99 Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Fri, 2 Jun 2023 17:40:07 +0300 Subject: [PATCH 416/509] xhci: Fix TRB prefetch issue of ZHAOXIN hosts commit 2a865a652299f5666f3b785cbe758c5f57453036 upstream. On some ZHAOXIN hosts, xHCI will prefetch TRB for performance improvement. However this TRB prefetch mechanism may cross page boundary, which may access memory not allocated by xHCI driver. In order to fix this issue, two pages was allocated for a segment and only the first page will be used. And add a quirk XHCI_ZHAOXIN_TRB_FETCH for this issue. Cc: stable@vger.kernel.org Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Message-ID: <20230602144009.1225632-10-mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 8 ++++++-- drivers/usb/host/xhci-pci.c | 7 ++++++- drivers/usb/host/xhci.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index a8a9addb4d25..cba05bb9ac37 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2472,8 +2472,12 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) * and our use of dma addresses in the trb_address_map radix tree needs * TRB_SEGMENT_SIZE alignment, so we pick the greater alignment need. */ - xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, - TRB_SEGMENT_SIZE, TRB_SEGMENT_SIZE, xhci->page_size); + if (xhci->quirks & XHCI_ZHAOXIN_TRB_FETCH) + xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, + TRB_SEGMENT_SIZE * 2, TRB_SEGMENT_SIZE * 2, xhci->page_size * 2); + else + xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, + TRB_SEGMENT_SIZE, TRB_SEGMENT_SIZE, xhci->page_size); /* See Table 46 and Note on Figure 55 */ xhci->device_pool = dma_pool_create("xHCI input/output contexts", dev, diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index b71e29aad9cc..8b772c9df435 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -331,8 +331,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_NO_SOFT_RETRY; if (pdev->vendor == PCI_VENDOR_ID_ZHAOXIN) { - if (pdev->device == 0x9202) + if (pdev->device == 0x9202) { xhci->quirks |= XHCI_RESET_ON_RESUME; + xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + } + + if (pdev->device == 0x9203) + xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; } /* xHC spec requires PCI devices to support D3hot and D3cold */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index c7749f6e3474..c8c873620583 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1895,6 +1895,7 @@ struct xhci_hcd { #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) #define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) +#define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) unsigned int num_active_eps; unsigned int limit_active_eps; From 790e4e82c57d07fdc011f63db028005691a0b7ab Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Fri, 2 Jun 2023 17:40:08 +0300 Subject: [PATCH 417/509] xhci: Show ZHAOXIN xHCI root hub speed correctly commit d9b0328d0b8b8298dfdc97cd8e0e2371d4bcc97b upstream. Some ZHAOXIN xHCI controllers follow usb3.1 spec, but only support gen1 speed 5Gbps. While in Linux kernel, if xHCI suspport usb3.1, root hub speed will show on 10Gbps. To fix this issue of ZHAOXIN xHCI platforms, read usb speed ID supported by xHCI to determine root hub speed. And add a quirk XHCI_ZHAOXIN_HOST for this issue. [fix warning about uninitialized symbol -Mathias] Suggested-by: Mathias Nyman Cc: stable@vger.kernel.org Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Message-ID: <20230602144009.1225632-11-mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 31 ++++++++++++++++++++++++------- drivers/usb/host/xhci-pci.c | 2 ++ drivers/usb/host/xhci.h | 1 + 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index cba05bb9ac37..390bdf823e08 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2146,7 +2146,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, { u32 temp, port_offset, port_count; int i; - u8 major_revision, minor_revision; + u8 major_revision, minor_revision, tmp_minor_revision; struct xhci_hub *rhub; struct device *dev = xhci_to_hcd(xhci)->self.sysdev; struct xhci_port_cap *port_cap; @@ -2166,6 +2166,15 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, */ if (minor_revision > 0x00 && minor_revision < 0x10) minor_revision <<= 4; + /* + * Some zhaoxin's xHCI controller that follow usb3.1 spec + * but only support Gen1. + */ + if (xhci->quirks & XHCI_ZHAOXIN_HOST) { + tmp_minor_revision = minor_revision; + minor_revision = 0; + } + } else if (major_revision <= 0x02) { rhub = &xhci->usb2_rhub; } else { @@ -2175,10 +2184,6 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, /* Ignoring port protocol we can't understand. FIXME */ return; } - rhub->maj_rev = XHCI_EXT_PORT_MAJOR(temp); - - if (rhub->min_rev < minor_revision) - rhub->min_rev = minor_revision; /* Port offset and count in the third dword, see section 7.2 */ temp = readl(addr + 2); @@ -2197,8 +2202,6 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, if (xhci->num_port_caps > max_caps) return; - port_cap->maj_rev = major_revision; - port_cap->min_rev = minor_revision; port_cap->psi_count = XHCI_EXT_PORT_PSIC(temp); if (port_cap->psi_count) { @@ -2219,6 +2222,11 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, XHCI_EXT_PORT_PSIV(port_cap->psi[i - 1]))) port_cap->psi_uid_count++; + if (xhci->quirks & XHCI_ZHAOXIN_HOST && + major_revision == 0x03 && + XHCI_EXT_PORT_PSIV(port_cap->psi[i]) >= 5) + minor_revision = tmp_minor_revision; + xhci_dbg(xhci, "PSIV:%d PSIE:%d PLT:%d PFD:%d LP:%d PSIM:%d\n", XHCI_EXT_PORT_PSIV(port_cap->psi[i]), XHCI_EXT_PORT_PSIE(port_cap->psi[i]), @@ -2228,6 +2236,15 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, XHCI_EXT_PORT_PSIM(port_cap->psi[i])); } } + + rhub->maj_rev = major_revision; + + if (rhub->min_rev < minor_revision) + rhub->min_rev = minor_revision; + + port_cap->maj_rev = major_revision; + port_cap->min_rev = minor_revision; + /* cache usb2 port capabilities */ if (major_revision < 0x03 && xhci->num_ext_caps < max_caps) xhci->ext_caps[xhci->num_ext_caps++] = temp; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 8b772c9df435..8034e643a4af 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -331,6 +331,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_NO_SOFT_RETRY; if (pdev->vendor == PCI_VENDOR_ID_ZHAOXIN) { + xhci->quirks |= XHCI_ZHAOXIN_HOST; + if (pdev->device == 0x9202) { xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index c8c873620583..6a7c05940e66 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1896,6 +1896,7 @@ struct xhci_hcd { #define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) +#define XHCI_ZHAOXIN_HOST BIT_ULL(46) unsigned int num_active_eps; unsigned int limit_active_eps; From 7f2f0e6ec561f6feeabefc1fc25e416d7cf05715 Mon Sep 17 00:00:00 2001 From: George Stark Date: Tue, 6 Jun 2023 19:53:57 +0300 Subject: [PATCH 418/509] meson saradc: fix clock divider mask length commit c57fa0037024c92c2ca34243e79e857da5d2c0a9 upstream. According to the datasheets of supported meson SoCs length of ADC_CLK_DIV field is 6-bit. Although all supported SoCs have the register with that field documented later SoCs use external clock rather than ADC internal clock so this patch affects only meson8 family (S8* SoCs). Fixes: 3adbf3427330 ("iio: adc: add a driver for the SAR ADC found in Amlogic Meson SoCs") Signed-off-by: George Stark Reviewed-by: Andy Shevchenko Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20230606165357.42417-1-gnstark@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/meson_saradc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index e03988698755..e771299fac9d 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -71,7 +71,7 @@ #define MESON_SAR_ADC_REG3_PANEL_DETECT_COUNT_MASK GENMASK(20, 18) #define MESON_SAR_ADC_REG3_PANEL_DETECT_FILTER_TB_MASK GENMASK(17, 16) #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_SHIFT 10 - #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_WIDTH 5 + #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_WIDTH 6 #define MESON_SAR_ADC_REG3_BLOCK_DLY_SEL_MASK GENMASK(9, 8) #define MESON_SAR_ADC_REG3_BLOCK_DLY_MASK GENMASK(7, 0) From 5f4a1111ad04ad08d582f6452779b3371b227192 Mon Sep 17 00:00:00 2001 From: Jiaqing Zhao Date: Mon, 19 Jun 2023 15:57:44 +0000 Subject: [PATCH 419/509] Revert "8250: add support for ASIX devices with a FIFO bug" commit a82d62f708545d22859584e0e0620da8e3759bbc upstream. This reverts commit eb26dfe8aa7eeb5a5aa0b7574550125f8aa4c3b3. Commit eb26dfe8aa7e ("8250: add support for ASIX devices with a FIFO bug") merged on Jul 13, 2012 adds a quirk for PCI_VENDOR_ID_ASIX (0x9710). But that ID is the same as PCI_VENDOR_ID_NETMOS defined in 1f8b061050c7 ("[PATCH] Netmos parallel/serial/combo support") merged on Mar 28, 2005. In pci_serial_quirks array, the NetMos entry always takes precedence over the ASIX entry even since it was initially merged, code in that commit is always unreachable. In my tests, adding the FIFO workaround to pci_netmos_init() makes no difference, and the vendor driver also does not have such workaround. Given that the code was never used for over a decade, it's safe to revert it. Also, the real PCI_VENDOR_ID_ASIX should be 0x125b, which is used on their newer AX99100 PCIe serial controllers released on 2016. The FIFO workaround should not be intended for these newer controllers, and it was never implemented in vendor driver. Fixes: eb26dfe8aa7e ("8250: add support for ASIX devices with a FIFO bug") Cc: stable Signed-off-by: Jiaqing Zhao Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230619155743.827859-1-jiaqing.zhao@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 1 - drivers/tty/serial/8250/8250_pci.c | 19 ------------------- drivers/tty/serial/8250/8250_port.c | 11 +++-------- include/linux/serial_8250.h | 1 - 4 files changed, 3 insertions(+), 29 deletions(-) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 0771cd226581..61b11490ae5b 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -87,7 +87,6 @@ struct serial8250_config { #define UART_BUG_TXEN (1 << 1) /* UART has buggy TX IIR status */ #define UART_BUG_NOMSR (1 << 2) /* UART has buggy MSR status bits (Au1x00) */ #define UART_BUG_THRE (1 << 3) /* UART has buggy THRE reassertion */ -#define UART_BUG_PARITY (1 << 4) /* UART mishandles parity if FIFO enabled */ #define UART_BUG_TXRACE (1 << 5) /* UART Tx fails to set remote DR */ diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 9617f7ad332d..fd857d434326 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1044,14 +1044,6 @@ static int pci_oxsemi_tornado_init(struct pci_dev *dev) return number_uarts; } -static int pci_asix_setup(struct serial_private *priv, - const struct pciserial_board *board, - struct uart_8250_port *port, int idx) -{ - port->bugs |= UART_BUG_PARITY; - return pci_default_setup(priv, board, port, idx); -} - /* Quatech devices have their own extra interface features */ struct quatech_feature { @@ -1874,7 +1866,6 @@ pci_moxa_setup(struct serial_private *priv, #define PCI_DEVICE_ID_WCH_CH355_4S 0x7173 #define PCI_VENDOR_ID_AGESTAR 0x5372 #define PCI_DEVICE_ID_AGESTAR_9375 0x6872 -#define PCI_VENDOR_ID_ASIX 0x9710 #define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a #define PCI_DEVICE_ID_AMCC_ADDIDATA_APCI7800 0x818e @@ -2684,16 +2675,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .exit = pci_wch_ch38x_exit, .setup = pci_wch_ch38x_setup, }, - /* - * ASIX devices with FIFO bug - */ - { - .vendor = PCI_VENDOR_ID_ASIX, - .device = PCI_ANY_ID, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_asix_setup, - }, /* * Broadcom TruManage (NetXtreme) */ diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index b19908779e3b..432a438929e6 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2577,11 +2577,8 @@ static unsigned char serial8250_compute_lcr(struct uart_8250_port *up, if (c_cflag & CSTOPB) cval |= UART_LCR_STOP; - if (c_cflag & PARENB) { + if (c_cflag & PARENB) cval |= UART_LCR_PARITY; - if (up->bugs & UART_BUG_PARITY) - up->fifo_bug = true; - } if (!(c_cflag & PARODD)) cval |= UART_LCR_EPAR; #ifdef CMSPAR @@ -2744,8 +2741,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, up->lcr = cval; /* Save computed LCR */ if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) { - /* NOTE: If fifo_bug is not set, a user can set RX_trigger. */ - if ((baud < 2400 && !up->dma) || up->fifo_bug) { + if (baud < 2400 && !up->dma) { up->fcr &= ~UART_FCR_TRIGGER_MASK; up->fcr |= UART_FCR_TRIGGER_1; } @@ -3081,8 +3077,7 @@ static int do_set_rxtrig(struct tty_port *port, unsigned char bytes) struct uart_8250_port *up = up_to_u8250p(uport); int rxtrig; - if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 || - up->fifo_bug) + if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1) return -EINVAL; rxtrig = bytes_to_fcr_rxtrig(up, bytes); diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 92f3b778d8c2..abb928361270 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -98,7 +98,6 @@ struct uart_8250_port { struct list_head list; /* ports on this IRQ */ u32 capabilities; /* port capabilities */ unsigned short bugs; /* port bugs */ - bool fifo_bug; /* min RX trigger if enabled */ unsigned int tx_loadsz; /* transmit fifo load size */ unsigned char acr; unsigned char fcr; From 07edd294b16a837714aa89885d3c621f23404892 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 22 Jun 2023 14:55:08 +0200 Subject: [PATCH 420/509] s390/decompressor: fix misaligned symbol build error commit 938f0c35d7d93a822ab9c9728e3205e8e57409d0 upstream. Nathan Chancellor reported a kernel build error on Fedora 39: $ clang --version | head -1 clang version 16.0.5 (Fedora 16.0.5-1.fc39) $ s390x-linux-gnu-ld --version | head -1 GNU ld version 2.40-1.fc39 $ make -skj"$(nproc)" ARCH=s390 CC=clang CROSS_COMPILE=s390x-linux-gnu- olddefconfig all s390x-linux-gnu-ld: arch/s390/boot/startup.o(.text+0x5b4): misaligned symbol `_decompressor_end' (0x35b0f) for relocation R_390_PC32DBL make[3]: *** [.../arch/s390/boot/Makefile:78: arch/s390/boot/vmlinux] Error 1 It turned out that the problem with misaligned symbols on s390 was fixed with commit 80ddf5ce1c92 ("s390: always build relocatable kernel") for the kernel image, but did not take into account that the decompressor uses its own set of CFLAGS, which come without -fPIE. Add the -fPIE flag also to the decompresser CFLAGS to fix this. Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Reported-by: CKI Suggested-by: Ulrich Weigand Link: https://github.com/ClangBuiltLinux/linux/issues/1747 Link: https://lore.kernel.org/32935.123062114500601371@us-mta-9.us.mimecast.lan/ Link: https://lore.kernel.org/r/20230622125508.1068457-1-hca@linux.ibm.com Cc: Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- arch/s390/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index a8cb00f30a7c..39ffcd4389f1 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -29,6 +29,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector +KBUILD_CFLAGS_DECOMPRESSOR += -fPIE KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) From 1576f0df7b4d1f82db588d6654b89d796fa06929 Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Wed, 12 Jul 2023 22:30:21 +0000 Subject: [PATCH 421/509] tracing/histograms: Add histograms to hist_vars if they have referenced variables commit 6018b585e8c6fa7d85d4b38d9ce49a5b67be7078 upstream. Hist triggers can have referenced variables without having direct variables fields. This can be the case if referenced variables are added for trigger actions. In this case the newly added references will not have field variables. Not taking such referenced variables into consideration can result in a bug where it would be possible to remove hist trigger with variables being refenced. This will result in a bug that is easily reproducable like so $ cd /sys/kernel/tracing $ echo 'synthetic_sys_enter char[] comm; long id' >> synthetic_events $ echo 'hist:keys=common_pid.execname,id.syscall:vals=hitcount:comm=common_pid.execname' >> events/raw_syscalls/sys_enter/trigger $ echo 'hist:keys=common_pid.execname,id.syscall:onmatch(raw_syscalls.sys_enter).synthetic_sys_enter($comm, id)' >> events/raw_syscalls/sys_enter/trigger $ echo '!hist:keys=common_pid.execname,id.syscall:vals=hitcount:comm=common_pid.execname' >> events/raw_syscalls/sys_enter/trigger [ 100.263533] ================================================================== [ 100.264634] BUG: KASAN: slab-use-after-free in resolve_var_refs+0xc7/0x180 [ 100.265520] Read of size 8 at addr ffff88810375d0f0 by task bash/439 [ 100.266320] [ 100.266533] CPU: 2 PID: 439 Comm: bash Not tainted 6.5.0-rc1 #4 [ 100.267277] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-20220807_005459-localhost 04/01/2014 [ 100.268561] Call Trace: [ 100.268902] [ 100.269189] dump_stack_lvl+0x4c/0x70 [ 100.269680] print_report+0xc5/0x600 [ 100.270165] ? resolve_var_refs+0xc7/0x180 [ 100.270697] ? kasan_complete_mode_report_info+0x80/0x1f0 [ 100.271389] ? resolve_var_refs+0xc7/0x180 [ 100.271913] kasan_report+0xbd/0x100 [ 100.272380] ? resolve_var_refs+0xc7/0x180 [ 100.272920] __asan_load8+0x71/0xa0 [ 100.273377] resolve_var_refs+0xc7/0x180 [ 100.273888] event_hist_trigger+0x749/0x860 [ 100.274505] ? kasan_save_stack+0x2a/0x50 [ 100.275024] ? kasan_set_track+0x29/0x40 [ 100.275536] ? __pfx_event_hist_trigger+0x10/0x10 [ 100.276138] ? ksys_write+0xd1/0x170 [ 100.276607] ? do_syscall_64+0x3c/0x90 [ 100.277099] ? entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 100.277771] ? destroy_hist_data+0x446/0x470 [ 100.278324] ? event_hist_trigger_parse+0xa6c/0x3860 [ 100.278962] ? __pfx_event_hist_trigger_parse+0x10/0x10 [ 100.279627] ? __kasan_check_write+0x18/0x20 [ 100.280177] ? mutex_unlock+0x85/0xd0 [ 100.280660] ? __pfx_mutex_unlock+0x10/0x10 [ 100.281200] ? kfree+0x7b/0x120 [ 100.281619] ? ____kasan_slab_free+0x15d/0x1d0 [ 100.282197] ? event_trigger_write+0xac/0x100 [ 100.282764] ? __kasan_slab_free+0x16/0x20 [ 100.283293] ? __kmem_cache_free+0x153/0x2f0 [ 100.283844] ? sched_mm_cid_remote_clear+0xb1/0x250 [ 100.284550] ? __pfx_sched_mm_cid_remote_clear+0x10/0x10 [ 100.285221] ? event_trigger_write+0xbc/0x100 [ 100.285781] ? __kasan_check_read+0x15/0x20 [ 100.286321] ? __bitmap_weight+0x66/0xa0 [ 100.286833] ? _find_next_bit+0x46/0xe0 [ 100.287334] ? task_mm_cid_work+0x37f/0x450 [ 100.287872] event_triggers_call+0x84/0x150 [ 100.288408] trace_event_buffer_commit+0x339/0x430 [ 100.289073] ? ring_buffer_event_data+0x3f/0x60 [ 100.292189] trace_event_raw_event_sys_enter+0x8b/0xe0 [ 100.295434] syscall_trace_enter.constprop.0+0x18f/0x1b0 [ 100.298653] syscall_enter_from_user_mode+0x32/0x40 [ 100.301808] do_syscall_64+0x1a/0x90 [ 100.304748] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 100.307775] RIP: 0033:0x7f686c75c1cb [ 100.310617] Code: 73 01 c3 48 8b 0d 65 3c 10 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 21 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 35 3c 10 00 f7 d8 64 89 01 48 [ 100.317847] RSP: 002b:00007ffc60137a38 EFLAGS: 00000246 ORIG_RAX: 0000000000000021 [ 100.321200] RAX: ffffffffffffffda RBX: 000055f566469ea0 RCX: 00007f686c75c1cb [ 100.324631] RDX: 0000000000000001 RSI: 0000000000000001 RDI: 000000000000000a [ 100.328104] RBP: 00007ffc60137ac0 R08: 00007f686c818460 R09: 000000000000000a [ 100.331509] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000009 [ 100.334992] R13: 0000000000000007 R14: 000000000000000a R15: 0000000000000007 [ 100.338381] We hit the bug because when second hist trigger has was created has_hist_vars() returned false because hist trigger did not have variables. As a result of that save_hist_vars() was not called to add the trigger to trace_array->hist_vars. Later on when we attempted to remove the first histogram find_any_var_ref() failed to detect it is being used because it did not find the second trigger in hist_vars list. With this change we wait until trigger actions are created so we can take into consideration if hist trigger has variable references. Also, now we check the return value of save_hist_vars() and fail trigger creation if save_hist_vars() fails. Link: https://lore.kernel.org/linux-trace-kernel/20230712223021.636335-1-mkhalfella@purestorage.com Cc: stable@vger.kernel.org Fixes: 067fe038e70f6 ("tracing: Add variable reference handling to hist triggers") Signed-off-by: Mohamed Khalfella Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 9ed65191888e..ae71cefb46bb 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5817,13 +5817,15 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, if (get_named_trigger_data(trigger_data)) goto enable; - if (has_hist_vars(hist_data)) - save_hist_vars(hist_data); - ret = create_actions(hist_data); if (ret) goto out_unreg; + if (has_hist_vars(hist_data) || hist_data->n_var_refs) { + if (save_hist_vars(hist_data)) + goto out_unreg; + } + ret = tracing_map_init(hist_data->map); if (ret) goto out_unreg; From 1f2a8f083575d1791b29adc4690b6a28475813fc Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Thu, 27 Apr 2023 16:06:59 +0200 Subject: [PATCH 422/509] samples: ftrace: Save required argument registers in sample trampolines commit 8564c315876ab86fcaf8e7f558d6a84cb2ce5590 upstream. The ftrace-direct-too sample traces the handle_mm_fault function whose signature changed since the introduction of the sample. Since: commit bce617edecad ("mm: do page fault accounting in handle_mm_fault") handle_mm_fault now has 4 arguments. Therefore, the sample trampoline should save 4 argument registers. s390 saves all argument registers already so it does not need a change but x86_64 needs an extra push and pop. This also evolves the signature of the tracing function to make it mirror the signature of the traced function. Link: https://lkml.kernel.org/r/20230427140700.625241-2-revest@chromium.org Cc: stable@vger.kernel.org Fixes: bce617edecad ("mm: do page fault accounting in handle_mm_fault") Reviewed-by: Steven Rostedt (Google) Reviewed-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Florent Revest Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- samples/ftrace/ftrace-direct-too.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 3927cb880d1a..4bdd67916ce4 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -4,14 +4,14 @@ #include /* for handle_mm_fault() */ #include -extern void my_direct_func(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); +extern void my_direct_func(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, struct pt_regs *regs); -void my_direct_func(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) +void my_direct_func(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, struct pt_regs *regs) { - trace_printk("handle mm fault vma=%p address=%lx flags=%x\n", - vma, address, flags); + trace_printk("handle mm fault vma=%p address=%lx flags=%x regs=%p\n", + vma, address, flags, regs); } extern void my_tramp(void *); @@ -26,7 +26,9 @@ asm ( " pushq %rdi\n" " pushq %rsi\n" " pushq %rdx\n" +" pushq %rcx\n" " call my_direct_func\n" +" popq %rcx\n" " popq %rdx\n" " popq %rsi\n" " popq %rdi\n" From 1e760b2d18bf129b3da052c2946c02758e97d15e Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Mon, 10 Jul 2023 18:36:21 -0700 Subject: [PATCH 423/509] net: ena: fix shift-out-of-bounds in exponential backoff commit 1e9cb763e9bacf0c932aa948f50dcfca6f519a26 upstream. The ENA adapters on our instances occasionally reset. Once recently logged a UBSAN failure to console in the process: UBSAN: shift-out-of-bounds in build/linux/drivers/net/ethernet/amazon/ena/ena_com.c:540:13 shift exponent 32 is too large for 32-bit type 'unsigned int' CPU: 28 PID: 70012 Comm: kworker/u72:2 Kdump: loaded not tainted 5.15.117 Hardware name: Amazon EC2 c5d.9xlarge/, BIOS 1.0 10/16/2017 Workqueue: ena ena_fw_reset_device [ena] Call Trace: dump_stack_lvl+0x4a/0x63 dump_stack+0x10/0x16 ubsan_epilogue+0x9/0x36 __ubsan_handle_shift_out_of_bounds.cold+0x61/0x10e ? __const_udelay+0x43/0x50 ena_delay_exponential_backoff_us.cold+0x16/0x1e [ena] wait_for_reset_state+0x54/0xa0 [ena] ena_com_dev_reset+0xc8/0x110 [ena] ena_down+0x3fe/0x480 [ena] ena_destroy_device+0xeb/0xf0 [ena] ena_fw_reset_device+0x30/0x50 [ena] process_one_work+0x22b/0x3d0 worker_thread+0x4d/0x3f0 ? process_one_work+0x3d0/0x3d0 kthread+0x12a/0x150 ? set_kthread_struct+0x50/0x50 ret_from_fork+0x22/0x30 Apparently, the reset delays are getting so large they can trigger a UBSAN panic. Looking at the code, the current timeout is capped at 5000us. Using a base value of 100us, the current code will overflow after (1<<29). Even at values before 32, this function wraps around, perhaps unintentionally. Cap the value of the exponent used for this backoff at (1<<16) which is larger than currently necessary, but large enough to support bigger values in the future. Cc: stable@vger.kernel.org Fixes: 4bb7f4cf60e3 ("net: ena: reduce driver load time") Signed-off-by: Krister Johansen Reviewed-by: Leon Romanovsky Reviewed-by: Shay Agroskin Link: https://lore.kernel.org/r/20230711013621.GE1926@templeofstupid.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amazon/ena/ena_com.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 5f8769aa469d..d59ea5148c16 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -35,6 +35,8 @@ #define ENA_REGS_ADMIN_INTR_MASK 1 +#define ENA_MAX_BACKOFF_DELAY_EXP 16U + #define ENA_MIN_ADMIN_POLL_US 100 #define ENA_MAX_ADMIN_POLL_US 5000 @@ -522,6 +524,7 @@ static int ena_com_comp_status_to_errno(u8 comp_status) static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us) { + exp = min_t(u32, exp, ENA_MAX_BACKOFF_DELAY_EXP); delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us); delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US); usleep_range(delay_us, 2 * delay_us); From 5e68f1f3a20fe9b6bde018e353269fbfa289609c Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Sun, 9 Jul 2023 06:51:44 +0800 Subject: [PATCH 424/509] ring-buffer: Fix deadloop issue on reading trace_pipe commit 7e42907f3a7b4ce3a2d1757f6d78336984daf8f5 upstream. Soft lockup occurs when reading file 'trace_pipe': watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [cat:4488] [...] RIP: 0010:ring_buffer_empty_cpu+0xed/0x170 RSP: 0018:ffff88810dd6fc48 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000246 RCX: ffffffff93d1aaeb RDX: ffff88810a280040 RSI: 0000000000000008 RDI: ffff88811164b218 RBP: ffff88811164b218 R08: 0000000000000000 R09: ffff88815156600f R10: ffffed102a2acc01 R11: 0000000000000001 R12: 0000000051651901 R13: 0000000000000000 R14: ffff888115e49500 R15: 0000000000000000 [...] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8d853c2000 CR3: 000000010dcd8000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __find_next_entry+0x1a8/0x4b0 ? peek_next_entry+0x250/0x250 ? down_write+0xa5/0x120 ? down_write_killable+0x130/0x130 trace_find_next_entry_inc+0x3b/0x1d0 tracing_read_pipe+0x423/0xae0 ? tracing_splice_read_pipe+0xcb0/0xcb0 vfs_read+0x16b/0x490 ksys_read+0x105/0x210 ? __ia32_sys_pwrite64+0x200/0x200 ? switch_fpu_return+0x108/0x220 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x61/0xc6 Through the vmcore, I found it's because in tracing_read_pipe(), ring_buffer_empty_cpu() found some buffer is not empty but then it cannot read anything due to "rb_num_of_entries() == 0" always true, Then it infinitely loop the procedure due to user buffer not been filled, see following code path: tracing_read_pipe() { ... ... waitagain: tracing_wait_pipe() // 1. find non-empty buffer here trace_find_next_entry_inc() // 2. loop here try to find an entry __find_next_entry() ring_buffer_empty_cpu(); // 3. find non-empty buffer peek_next_entry() // 4. but peek always return NULL ring_buffer_peek() rb_buffer_peek() rb_get_reader_page() // 5. because rb_num_of_entries() == 0 always true here // then return NULL // 6. user buffer not been filled so goto 'waitgain' // and eventually leads to an deadloop in kernel!!! } By some analyzing, I found that when resetting ringbuffer, the 'entries' of its pages are not all cleared (see rb_reset_cpu()). Then when reducing the ringbuffer, and if some reduced pages exist dirty 'entries' data, they will be added into 'cpu_buffer->overrun' (see rb_remove_pages()), which cause wrong 'overrun' count and eventually cause the deadloop issue. To fix it, we need to clear every pages in rb_reset_cpu(). Link: https://lore.kernel.org/linux-trace-kernel/20230708225144.3785600-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: a5fb833172eca ("ring-buffer: Fix uninitialized read_stamp") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f08904914166..593e446f6c48 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4954,28 +4954,34 @@ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_size); +static void rb_clear_buffer_page(struct buffer_page *page) +{ + local_set(&page->write, 0); + local_set(&page->entries, 0); + rb_init_page(page->page); + page->read = 0; +} + static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) { + struct buffer_page *page; + rb_head_page_deactivate(cpu_buffer); cpu_buffer->head_page = list_entry(cpu_buffer->pages, struct buffer_page, list); - local_set(&cpu_buffer->head_page->write, 0); - local_set(&cpu_buffer->head_page->entries, 0); - local_set(&cpu_buffer->head_page->page->commit, 0); - - cpu_buffer->head_page->read = 0; + rb_clear_buffer_page(cpu_buffer->head_page); + list_for_each_entry(page, cpu_buffer->pages, list) { + rb_clear_buffer_page(page); + } cpu_buffer->tail_page = cpu_buffer->head_page; cpu_buffer->commit_page = cpu_buffer->head_page; INIT_LIST_HEAD(&cpu_buffer->reader_page->list); INIT_LIST_HEAD(&cpu_buffer->new_pages); - local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->entries, 0); - local_set(&cpu_buffer->reader_page->page->commit, 0); - cpu_buffer->reader_page->read = 0; + rb_clear_buffer_page(cpu_buffer->reader_page); local_set(&cpu_buffer->entries_bytes, 0); local_set(&cpu_buffer->overrun, 0); From 81fb8a58d4ec10c489ee87b315bca5fbcbf155bc Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 3 Jul 2023 11:01:42 -0700 Subject: [PATCH 425/509] xtensa: ISS: fix call to split_if_spec commit bc8d5916541fa19ca5bc598eb51a5f78eb891a36 upstream. split_if_spec expects a NULL-pointer as an end marker for the argument list, but tuntap_probe never supplied that terminating NULL. As a result incorrectly formatted interface specification string may cause a crash because of the random memory access. Fix that by adding NULL terminator to the split_if_spec argument list. Cc: stable@vger.kernel.org Fixes: 7282bee78798 ("[PATCH] xtensa: Architecture support for Tensilica Xtensa Part 8") Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/platforms/iss/network.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 08d70c868c13..1270de83435e 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -231,7 +231,7 @@ static int tuntap_probe(struct iss_net_private *lp, int index, char *init) init += sizeof(TRANSPORT_TUNTAP_NAME) - 1; if (*init == ',') { - rem = split_if_spec(init + 1, &mac_str, &dev_name); + rem = split_if_spec(init + 1, &mac_str, &dev_name, NULL); if (rem != NULL) { pr_err("%s: extra garbage on specification : '%s'\n", dev->name, rem); From 7060e5aac6dc195124c106f49106d653a416323a Mon Sep 17 00:00:00 2001 From: Mateusz Stachyra Date: Tue, 4 Jul 2023 12:27:06 +0200 Subject: [PATCH 426/509] tracing: Fix null pointer dereference in tracing_err_log_open() commit 02b0095e2fbbc060560c1065f86a211d91e27b26 upstream. Fix an issue in function 'tracing_err_log_open'. The function doesn't call 'seq_open' if the file is opened only with write permissions, which results in 'file->private_data' being left as null. If we then use 'lseek' on that opened file, 'seq_lseek' dereferences 'file->private_data' in 'mutex_lock(&m->lock)', resulting in a kernel panic. Writing to this node requires root privileges, therefore this bug has very little security impact. Tracefs node: /sys/kernel/tracing/error_log Example Kernel panic: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000038 Call trace: mutex_lock+0x30/0x110 seq_lseek+0x34/0xb8 __arm64_sys_lseek+0x6c/0xb8 invoke_syscall+0x58/0x13c el0_svc_common+0xc4/0x10c do_el0_svc+0x24/0x98 el0_svc+0x24/0x88 el0t_64_sync_handler+0x84/0xe4 el0t_64_sync+0x1b4/0x1b8 Code: d503201f aa0803e0 aa1f03e1 aa0103e9 (c8e97d02) ---[ end trace 561d1b49c12cf8a5 ]--- Kernel panic - not syncing: Oops: Fatal exception Link: https://lore.kernel.org/linux-trace-kernel/20230703155237eucms1p4dfb6a19caa14c79eb6c823d127b39024@eucms1p4 Link: https://lore.kernel.org/linux-trace-kernel/20230704102706eucms1p30d7ecdcc287f46ad67679fc8491b2e0f@eucms1p3 Cc: stable@vger.kernel.org Fixes: 8a062902be725 ("tracing: Add tracing error log") Signed-off-by: Mateusz Stachyra Suggested-by: Steven Rostedt Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 70526400e05c..4dd8c16f023b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7529,7 +7529,7 @@ static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, - .llseek = seq_lseek, + .llseek = tracing_lseek, .release = tracing_err_log_release, }; From 6ea2a408d3e3cb8a97700d72f1733d95d465e50a Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 11 Jul 2023 23:15:38 +0900 Subject: [PATCH 427/509] tracing/probes: Fix not to count error code to total length commit b41326b5e0f82e93592c4366359917b5d67b529f upstream. Fix not to count the error code (which is minus value) to the total used length of array, because it can mess up the return code of process_fetch_insn_bottom(). Also clear the 'ret' value because it will be used for calculating next data_loc entry. Link: https://lore.kernel.org/all/168908493827.123124.2175257289106364229.stgit@devnote2/ Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/8819b154-2ba1-43c3-98a2-cbde20892023@moroto.mountain/ Fixes: 9b960a38835f ("tracing: probeevent: Unify fetch_insn processing common part") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_probe_tmpl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index e5282828f4a6..29348874ebde 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -143,6 +143,8 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, array: /* the last stage: Loop on array */ if (code->op == FETCH_OP_LP_ARRAY) { + if (ret < 0) + ret = 0; total += ret; if (++i < code->param) { code = s3; From a9fe97fb7b4ee21bffb76f2acb05769bad27ae70 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 28 Apr 2023 00:53:38 -0700 Subject: [PATCH 428/509] scsi: qla2xxx: Wait for io return on terminate rport commit fc0cba0c7be8261a1625098bd1d695077ec621c9 upstream. System crash due to use after free. Current code allows terminate_rport_io to exit before making sure all IOs has returned. For FCP-2 device, IO's can hang on in HW because driver has not tear down the session in FW at first sign of cable pull. When dev_loss_tmo timer pops, terminate_rport_io is called and upper layer is about to free various resources. Terminate_rport_io trigger qla to do the final cleanup, but the cleanup might not be fast enough where it leave qla still holding on to the same resource. Wait for IO's to return to upper layer before resources are freed. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230428075339.32551-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 61b9dc511d90..12e27ee8c5c7 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2698,6 +2698,7 @@ static void qla2x00_terminate_rport_io(struct fc_rport *rport) { fc_port_t *fcport = *(fc_port_t **)rport->dd_data; + scsi_qla_host_t *vha; if (!fcport) return; @@ -2707,9 +2708,12 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags)) return; + vha = fcport->vha; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); return; } /* @@ -2724,6 +2728,15 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) else qla2x00_port_logout(fcport->vha, fcport); } + + /* check for any straggling io left behind */ + if (qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET)) { + ql_log(ql_log_warn, vha, 0x300b, + "IO not return. Resetting. \n"); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_chip_reset(vha); + } } static int From bcd773969a87d9802053c0db5be84abd6594a024 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 7 Jun 2023 17:08:36 +0530 Subject: [PATCH 429/509] scsi: qla2xxx: Array index may go out of bound commit d721b591b95cf3f290f8a7cbe90aa2ee0368388d upstream. Klocwork reports array 'vha->host_str' of size 16 may use index value(s) 16..19. Use snprintf() instead of sprintf(). Cc: stable@vger.kernel.org Co-developed-by: Bikash Hazarika Signed-off-by: Bikash Hazarika Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 38b8ff87ec0a..cbc5af26303a 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4877,7 +4877,8 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, } INIT_DELAYED_WORK(&vha->scan.scan_work, qla_scan_work_fn); - sprintf(vha->host_str, "%s_%lu", QLA2XXX_DRIVER_NAME, vha->host_no); + snprintf(vha->host_str, sizeof(vha->host_str), "%s_%lu", + QLA2XXX_DRIVER_NAME, vha->host_no); ql_dbg(ql_dbg_init, vha, 0x0041, "Allocated the host=%p hw=%p vha=%p dev_name=%s", vha->host, vha->hw, vha, From eecb8a491c824a9376155d26ec95b6d0054c059c Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 7 Jun 2023 17:08:40 +0530 Subject: [PATCH 430/509] scsi: qla2xxx: Fix buffer overrun commit b68710a8094fdffe8dd4f7a82c82649f479bb453 upstream. Klocwork warning: Buffer Overflow - Array Index Out of Bounds Driver uses fc_els_flogi to calculate size of buffer. The actual buffer is nested inside of fc_els_flogi which is smaller. Replace structure name to allow proper size calculation. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-6-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 422ff67038d1..3d1a53ba86ac 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5107,7 +5107,7 @@ static void qla_get_login_template(scsi_qla_host_t *vha) __be32 *q; memset(ha->init_cb, 0, ha->init_cb_size); - sz = min_t(int, sizeof(struct fc_els_flogi), ha->init_cb_size); + sz = min_t(int, sizeof(struct fc_els_csp), ha->init_cb_size); rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma, ha->init_cb, sz); if (rval != QLA_SUCCESS) { From 2bea9c1c983152c5411f5a2f1113cb790ce1389d Mon Sep 17 00:00:00 2001 From: Bikash Hazarika Date: Wed, 7 Jun 2023 17:08:37 +0530 Subject: [PATCH 431/509] scsi: qla2xxx: Fix potential NULL pointer dereference commit 464ea494a40c6e3e0e8f91dd325408aaf21515ba upstream. Klocwork tool reported 'cur_dsd' may be dereferenced. Add fix to validate pointer before dereferencing the pointer. Cc: stable@vger.kernel.org Signed-off-by: Bikash Hazarika Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_iocb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index f0af76c3de7e..54fc0afbc02a 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -601,7 +601,8 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt, put_unaligned_le32(COMMAND_TYPE_6, &cmd_pkt->entry_type); /* No data transfer */ - if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { + if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE || + tot_dsds == 0) { cmd_pkt->byte_count = cpu_to_le32(0); return 0; } From 921d6844625527a92d1178262a633cc88a8e61bd Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 7 Jun 2023 17:08:39 +0530 Subject: [PATCH 432/509] scsi: qla2xxx: Check valid rport returned by fc_bsg_to_rport() commit af73f23a27206ffb3c477cac75b5fcf03410556e upstream. Klocwork reported warning of rport maybe NULL and will be dereferenced. rport returned by call to fc_bsg_to_rport() could be NULL and dereferenced. Check valid rport returned by fc_bsg_to_rport(). Cc: stable@vger.kernel.org Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-5-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_bsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 1fd292a6ac88..d5e96523ba12 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -268,6 +268,10 @@ qla2x00_process_els(struct bsg_job *bsg_job) if (bsg_request->msgcode == FC_BSG_RPT_ELS) { rport = fc_bsg_to_rport(bsg_job); + if (!rport) { + rval = -ENOMEM; + goto done; + } fcport = *(fc_port_t **) rport->dd_data; host = rport_to_shost(rport); vha = shost_priv(host); From e8de73238d5d5db64b94474023adc26223088a3f Mon Sep 17 00:00:00 2001 From: Bikash Hazarika Date: Wed, 7 Jun 2023 17:08:42 +0530 Subject: [PATCH 433/509] scsi: qla2xxx: Correct the index of array commit b1b9d3825df4c757d653d0b1df66f084835db9c3 upstream. Klocwork reported array 'port_dstate_str' of size 10 may use index value(s) 10..15. Add a fix to correct the index of array. Cc: stable@vger.kernel.org Signed-off-by: Bikash Hazarika Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-8-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_inline.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index e80e41b6c9e1..7e8b59a0954b 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -109,11 +109,13 @@ qla2x00_set_fcport_disc_state(fc_port_t *fcport, int state) { int old_val; uint8_t shiftbits, mask; + uint8_t port_dstate_str_sz; /* This will have to change when the max no. of states > 16 */ shiftbits = 4; mask = (1 << shiftbits) - 1; + port_dstate_str_sz = sizeof(port_dstate_str) / sizeof(char *); fcport->disc_state = state; while (1) { old_val = atomic_read(&fcport->shadow_disc_state); @@ -121,7 +123,8 @@ qla2x00_set_fcport_disc_state(fc_port_t *fcport, int state) old_val, (old_val << shiftbits) | state)) { ql_dbg(ql_dbg_disc, fcport->vha, 0x2134, "FCPort %8phC disc_state transition: %s to %s - portid=%06x.\n", - fcport->port_name, port_dstate_str[old_val & mask], + fcport->port_name, (old_val & mask) < port_dstate_str_sz ? + port_dstate_str[old_val & mask] : "Unknown", port_dstate_str[state], fcport->d_id.b24); return; } From 5addd62586a94a572359418464ce0ae12fa46187 Mon Sep 17 00:00:00 2001 From: Shreyas Deodhar Date: Wed, 7 Jun 2023 17:08:41 +0530 Subject: [PATCH 434/509] scsi: qla2xxx: Pointer may be dereferenced commit 00eca15319d9ce8c31cdf22f32a3467775423df4 upstream. Klocwork tool reported pointer 'rport' returned from call to function fc_bsg_to_rport() may be NULL and will be dereferenced. Add a fix to validate rport before dereferencing. Cc: stable@vger.kernel.org Signed-off-by: Shreyas Deodhar Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_bsg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index d5e96523ba12..804cac4c3476 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -2545,6 +2545,8 @@ qla24xx_bsg_request(struct bsg_job *bsg_job) if (bsg_request->msgcode == FC_BSG_RPT_ELS) { rport = fc_bsg_to_rport(bsg_job); + if (!rport) + return ret; host = rport_to_shost(rport); vha = shost_priv(host); } else { From 0b1ce92fabdb7d02ddf8641230a06e2752ae5baa Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Thu, 15 Jun 2023 13:16:33 +0530 Subject: [PATCH 435/509] scsi: qla2xxx: Remove unused nvme_ls_waitq wait queue commit 20fce500b232b970e40312a9c97e7f3b6d7a709c upstream. System crash when qla2x00_start_sp(sp) returns error code EGAIN and wake_up gets called for uninitialized wait queue sp->nvme_ls_waitq. qla2xxx [0000:37:00.1]-2121:5: Returning existing qpair of ffff8ae2c0513400 for idx=0 qla2xxx [0000:37:00.1]-700e:5: qla2x00_start_sp failed = 11 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 09/03/2021 Workqueue: nvme-wq nvme_fc_connect_ctrl_work [nvme_fc] RIP: 0010:__wake_up_common+0x4c/0x190 RSP: 0018:ffff95f3e0cb7cd0 EFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff8b08d3b26328 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8b08d3b26320 RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffffffffe8 R10: 0000000000000000 R11: ffff95f3e0cb7a60 R12: ffff95f3e0cb7d20 R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8b2fdf6c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000002f1e410002 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __wake_up_common_lock+0x7c/0xc0 qla_nvme_ls_req+0x355/0x4c0 [qla2xxx] ? __nvme_fc_send_ls_req+0x260/0x380 [nvme_fc] ? nvme_fc_send_ls_req.constprop.42+0x1a/0x45 [nvme_fc] ? nvme_fc_connect_ctrl_work.cold.63+0x1e3/0xa7d [nvme_fc] Remove unused nvme_ls_waitq wait queue. nvme_ls_waitq logic was removed previously in the commits tagged Fixed: below. Fixes: 219d27d7147e ("scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands") Fixes: 5621b0dd7453 ("scsi: qla2xxx: Simpify unregistration of FC-NVMe local/remote ports") Cc: stable@vger.kernel.org Signed-off-by: Manish Rangankar Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230615074633.12721-1-njavali@marvell.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 1 - drivers/scsi/qla2xxx/qla_nvme.c | 3 --- 2 files changed, 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 6afce455b9d8..06b0ad2b51bb 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -639,7 +639,6 @@ typedef struct srb { struct iocb_resource iores; struct kref cmd_kref; /* need to migrate ref_count over to this */ void *priv; - wait_queue_head_t nvme_ls_waitq; struct fc_port *fcport; struct scsi_qla_host *vha; unsigned int start_timer:1; diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 695dd89be330..8b0c8f9bdef0 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -331,7 +331,6 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, "qla2x00_start_sp failed = %d\n", rval); - wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; qla2x00_rel_sp(sp); @@ -590,7 +589,6 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, if (!sp) return -EBUSY; - init_waitqueue_head(&sp->nvme_ls_waitq); kref_init(&sp->cmd_kref); spin_lock_init(&priv->cmd_lock); sp->priv = priv; @@ -608,7 +606,6 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x212d, "qla2x00_start_nvme_mq failed = %d\n", rval); - wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; qla2xxx_rel_qpair_sp(sp->qpair, sp); From 73e72a5380a27ec5cd15a03d02ff881b0777a605 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 11 Jul 2023 18:01:00 -0300 Subject: [PATCH 436/509] net/sched: sch_qfq: reintroduce lmax bound check for MTU commit 158810b261d02fc7dd92ca9c392d8f8a211a2401 upstream. 25369891fcef deletes a check for the case where no 'lmax' is specified which 3037933448f6 previously fixed as 'lmax' could be set to the device's MTU without any bound checking for QFQ_LMAX_MIN and QFQ_LMAX_MAX. Therefore, reintroduce the check. Fixes: 25369891fcef ("net/sched: sch_qfq: refactor parsing of netlink parameters") Acked-by: Jamal Hadi Salim Reviewed-by: Eric Dumazet Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_qfq.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 616d1798cfef..d5a1e4b237b1 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -428,10 +428,17 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, else weight = 1; - if (tb[TCA_QFQ_LMAX]) + if (tb[TCA_QFQ_LMAX]) { lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); - else + } else { + /* MTU size is user controlled */ lmax = psched_mtu(qdisc_dev(sch)); + if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) { + NL_SET_ERR_MSG_MOD(extack, + "MTU size out of bounds for qfq"); + return -EINVAL; + } + } inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; From 9a085fa9b7d644a234465091e038c1911e1a4f2a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 16 Sep 2021 15:34:46 -0300 Subject: [PATCH 437/509] RDMA/cma: Ensure rdma_addr_cancel() happens before issuing more requests commit 305d568b72f17f674155a2a8275f865f207b3808 upstream. The FSM can run in a circle allowing rdma_resolve_ip() to be called twice on the same id_priv. While this cannot happen without going through the work, it violates the invariant that the same address resolution background request cannot be active twice. CPU 1 CPU 2 rdma_resolve_addr(): RDMA_CM_IDLE -> RDMA_CM_ADDR_QUERY rdma_resolve_ip(addr_handler) #1 process_one_req(): for #1 addr_handler(): RDMA_CM_ADDR_QUERY -> RDMA_CM_ADDR_BOUND mutex_unlock(&id_priv->handler_mutex); [.. handler still running ..] rdma_resolve_addr(): RDMA_CM_ADDR_BOUND -> RDMA_CM_ADDR_QUERY rdma_resolve_ip(addr_handler) !! two requests are now on the req_list rdma_destroy_id(): destroy_id_handler_unlock(): _destroy_id(): cma_cancel_operation(): rdma_addr_cancel() // process_one_req() self removes it spin_lock_bh(&lock); cancel_delayed_work(&req->work); if (!list_empty(&req->list)) == true ! rdma_addr_cancel() returns after process_on_req #1 is done kfree(id_priv) process_one_req(): for #2 addr_handler(): mutex_lock(&id_priv->handler_mutex); !! Use after free on id_priv rdma_addr_cancel() expects there to be one req on the list and only cancels the first one. The self-removal behavior of the work only happens after the handler has returned. This yields a situations where the req_list can have two reqs for the same "handle" but rdma_addr_cancel() only cancels the first one. The second req remains active beyond rdma_destroy_id() and will use-after-free id_priv once it inevitably triggers. Fix this by remembering if the id_priv has called rdma_resolve_ip() and always cancel before calling it again. This ensures the req_list never gets more than one item in it and doesn't cost anything in the normal flow that never uses this strange error path. Link: https://lore.kernel.org/r/0-v1-3bc675b8006d+22-syz_cancel_uaf_jgg@nvidia.com Cc: stable@vger.kernel.org Fixes: e51060f08a61 ("IB: IP address based RDMA connection manager") Reported-by: syzbot+dc3dfba010d7671e05f5@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe Signed-off-by: Anton Gusev Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 23 +++++++++++++++++++++++ drivers/infiniband/core/cma_priv.h | 1 + 2 files changed, 24 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index db24f7dfa00f..805678f6fe57 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1792,6 +1792,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, { switch (state) { case RDMA_CM_ADDR_QUERY: + /* + * We can avoid doing the rdma_addr_cancel() based on state, + * only RDMA_CM_ADDR_QUERY has a work that could still execute. + * Notice that the addr_handler work could still be exiting + * outside this state, however due to the interaction with the + * handler_mutex the work is guaranteed not to touch id_priv + * during exit. + */ rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); break; case RDMA_CM_ROUTE_QUERY: @@ -3401,6 +3409,21 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, if (dst_addr->sa_family == AF_IB) { ret = cma_resolve_ib_addr(id_priv); } else { + /* + * The FSM can return back to RDMA_CM_ADDR_BOUND after + * rdma_resolve_ip() is called, eg through the error + * path in addr_handler(). If this happens the existing + * request must be canceled before issuing a new one. + * Since canceling a request is a bit slow and this + * oddball path is rare, keep track once a request has + * been issued. The track turns out to be a permanent + * state since this is the only cancel as it is + * immediately before rdma_resolve_ip(). + */ + if (id_priv->used_resolve_ip) + rdma_addr_cancel(&id->route.addr.dev_addr); + else + id_priv->used_resolve_ip = 1; ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr, &id->route.addr.dev_addr, timeout_ms, addr_handler, diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h index caece96ebcf5..b53f4fa5e3fb 100644 --- a/drivers/infiniband/core/cma_priv.h +++ b/drivers/infiniband/core/cma_priv.h @@ -89,6 +89,7 @@ struct rdma_id_private { u8 reuseaddr; u8 afonly; u8 timeout; + u8 used_resolve_ip; enum ib_gid_type gid_type; /* From f09c0ac142c59495262dd80545f261b2aeeba538 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 21 Jul 2023 15:58:38 +0200 Subject: [PATCH 438/509] drm/atomic: Fix potential use-after-free in nonblocking commits commit 4e076c73e4f6e90816b30fcd4a0d7ab365087255 upstream. This requires a bit of background. Properly done a modeset driver's unload/remove sequence should be drm_dev_unplug(); drm_atomic_helper_shutdown(); drm_dev_put(); The trouble is that the drm_dev_unplugged() checks are by design racy, they do not synchronize against all outstanding ioctl. This is because those ioctl could block forever (both for modeset and for driver specific ioctls), leading to deadlocks in hotunplug. Instead the code sections that touch the hardware need to be annotated with drm_dev_enter/exit, to avoid accessing hardware resources after the unload/remove has finished. To avoid use-after-free issues all the involved userspace visible objects are supposed to hold a reference on the underlying drm_device, like drm_file does. The issue now is that we missed one, the atomic modeset ioctl can be run in a nonblocking fashion, and in that case it cannot rely on the implied drm_device reference provided by the ioctl calling context. This can result in a use-after-free if an nonblocking atomic commit is carefully raced against a driver unload. Fix this by unconditionally grabbing a drm_device reference for any drm_atomic_state structures. Strictly speaking this isn't required for blocking commits and TEST_ONLY calls, but it's the simpler approach. Thanks to shanzhulig for the initial idea of grabbing an unconditional reference, I just added comments, a condensed commit message and fixed a minor potential issue in where exactly we drop the final reference. Reported-by: shanzhulig Suggested-by: shanzhulig Reviewed-by: Maxime Ripard Cc: Maarten Lankhorst Cc: Thomas Zimmermann Cc: David Airlie Cc: stable@kernel.org Signed-off-by: Daniel Vetter Signed-off-by: Daniel Vetter Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 98b659981f1a..b10ba5057735 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -98,6 +98,12 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) if (!state->planes) goto fail; + /* + * Because drm_atomic_state can be committed asynchronously we need our + * own reference and cannot rely on the on implied by drm_file in the + * ioctl call. + */ + drm_dev_get(dev); state->dev = dev; DRM_DEBUG_ATOMIC("Allocated atomic state %p\n", state); @@ -257,7 +263,8 @@ EXPORT_SYMBOL(drm_atomic_state_clear); void __drm_atomic_state_free(struct kref *ref) { struct drm_atomic_state *state = container_of(ref, typeof(*state), ref); - struct drm_mode_config *config = &state->dev->mode_config; + struct drm_device *dev = state->dev; + struct drm_mode_config *config = &dev->mode_config; drm_atomic_state_clear(state); @@ -269,6 +276,8 @@ void __drm_atomic_state_free(struct kref *ref) drm_atomic_state_default_release(state); kfree(state); } + + drm_dev_put(dev); } EXPORT_SYMBOL(__drm_atomic_state_free); From ce2a7e7b504c9adaac52b51186acb3b9b18022a8 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Thu, 13 Jul 2023 15:57:13 +0800 Subject: [PATCH 439/509] ALSA: hda/realtek - remove 3k pull low procedure commit 69ea4c9d02b7947cdd612335a61cc1a02e544ccd upstream. This was the ALC283 depop procedure. Maybe this procedure wasn't suitable with new codec. So, let us remove it. But HP 15z-fc000 must do 3k pull low. If it reboot with plugged headset, it will have errors show don't find codec error messages. Run 3k pull low will solve issues. So, let AMD chipset will run this for workarround. Fixes: 5aec98913095 ("ALSA: hda/realtek - ALC236 headset MIC recording issue") Signed-off-by: Kailang Yang Cc: Reported-by: Joseph C. Sible Closes: https://lore.kernel.org/r/CABpewhE4REgn9RJZduuEU6Z_ijXNeQWnrxO1tg70Gkw=F8qNYg@mail.gmail.com/ Link: https://lore.kernel.org/r/4678992299664babac4403d9978e7ba7@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7d6a36b647a7..244278fb8067 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -121,6 +121,7 @@ struct alc_spec { unsigned int ultra_low_power:1; unsigned int has_hs_key:1; unsigned int no_internal_mic_pin:1; + unsigned int en_3kpull_low:1; /* for PLL fix */ hda_nid_t pll_nid; @@ -3617,6 +3618,7 @@ static void alc256_shutup(struct hda_codec *codec) if (!hp_pin) hp_pin = 0x21; + alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ hp_pin_sense = snd_hda_jack_detect(codec, hp_pin); if (hp_pin_sense) @@ -3633,8 +3635,7 @@ static void alc256_shutup(struct hda_codec *codec) /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly * when booting with headset plugged. So skip setting it for the codec alc257 */ - if (codec->core.vendor_id != 0x10ec0236 && - codec->core.vendor_id != 0x10ec0257) + if (spec->en_3kpull_low) alc_update_coef_idx(codec, 0x46, 0, 3 << 12); if (!spec->no_shutup_pins) @@ -10065,6 +10066,8 @@ static int patch_alc269(struct hda_codec *codec) spec->shutup = alc256_shutup; spec->init_hook = alc256_init; spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */ + if (codec->bus->pci->vendor == PCI_VENDOR_ID_AMD) + spec->en_3kpull_low = true; break; case 0x10ec0257: spec->codec_variant = ALC269_TYPE_ALC257; From a26208e184ae30cd1a0f256f92e53d53834a2eb5 Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Tue, 18 Jul 2023 18:12:41 +0200 Subject: [PATCH 440/509] ALSA: hda/realtek: Enable Mute LED on HP Laptop 15s-eq2xxx commit 0659400f18c0e6c0c69d74fe5d09e7f6fbbd52a2 upstream. The HP Laptop 15s-eq2xxx uses ALC236 codec and controls the mute LED using COEF 0x07 index 1. No existing quirk covers this configuration. Adds a new quirk and enables it for the device. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20230718161241.393181-1-l.guzenko@web.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 244278fb8067..6bfc7e28515a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4559,6 +4559,21 @@ static void alc236_fixup_hp_mute_led_coefbit(struct hda_codec *codec, } } +static void alc236_fixup_hp_mute_led_coefbit2(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->mute_led_polarity = 0; + spec->mute_led_coef.idx = 0x07; + spec->mute_led_coef.mask = 1; + spec->mute_led_coef.on = 1; + spec->mute_led_coef.off = 0; + snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set); + } +} + /* turn on/off mic-mute LED per capture hook by coef bit */ static int coef_micmute_led_set(struct led_classdev *led_cdev, enum led_brightness brightness) @@ -6878,6 +6893,7 @@ enum { ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED, + ALC236_FIXUP_HP_MUTE_LED_COEFBIT2, ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, @@ -8250,6 +8266,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360_mute_led, }, + [ALC236_FIXUP_HP_MUTE_LED_COEFBIT2] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc236_fixup_hp_mute_led_coefbit2, + }, [ALC236_FIXUP_HP_GPIO_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_gpio_led, @@ -9004,6 +9024,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x887a, "HP Laptop 15s-eq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED), SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED), From 0a6b0ca58685be34979236f83f2b322635b80b32 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Thu, 23 Mar 2023 14:04:12 +0100 Subject: [PATCH 441/509] keys: Fix linking a duplicate key to a keyring's assoc_array commit d55901522f96082a43b9842d34867363c0cdbac5 upstream. When making a DNS query inside the kernel using dns_query(), the request code can in rare cases end up creating a duplicate index key in the assoc_array of the destination keyring. It is eventually found by a BUG_ON() check in the assoc_array implementation and results in a crash. Example report: [2158499.700025] kernel BUG at ../lib/assoc_array.c:652! [2158499.700039] invalid opcode: 0000 [#1] SMP PTI [2158499.700065] CPU: 3 PID: 31985 Comm: kworker/3:1 Kdump: loaded Not tainted 5.3.18-150300.59.90-default #1 SLE15-SP3 [2158499.700096] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 [2158499.700351] Workqueue: cifsiod cifs_resolve_server [cifs] [2158499.700380] RIP: 0010:assoc_array_insert+0x85f/0xa40 [2158499.700401] Code: ff 74 2b 48 8b 3b 49 8b 45 18 4c 89 e6 48 83 e7 fe e8 95 ec 74 00 3b 45 88 7d db 85 c0 79 d4 0f 0b 0f 0b 0f 0b e8 41 f2 be ff <0f> 0b 0f 0b 81 7d 88 ff ff ff 7f 4c 89 eb 4c 8b ad 58 ff ff ff 0f [2158499.700448] RSP: 0018:ffffc0bd6187faf0 EFLAGS: 00010282 [2158499.700470] RAX: ffff9f1ea7da2fe8 RBX: ffff9f1ea7da2fc1 RCX: 0000000000000005 [2158499.700492] RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000000 [2158499.700515] RBP: ffffc0bd6187fbb0 R08: ffff9f185faf1100 R09: 0000000000000000 [2158499.700538] R10: ffff9f1ea7da2cc0 R11: 000000005ed8cec8 R12: ffffc0bd6187fc28 [2158499.700561] R13: ffff9f15feb8d000 R14: ffff9f1ea7da2fc0 R15: ffff9f168dc0d740 [2158499.700585] FS: 0000000000000000(0000) GS:ffff9f185fac0000(0000) knlGS:0000000000000000 [2158499.700610] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [2158499.700630] CR2: 00007fdd94fca238 CR3: 0000000809d8c006 CR4: 00000000003706e0 [2158499.700702] Call Trace: [2158499.700741] ? key_alloc+0x447/0x4b0 [2158499.700768] ? __key_link_begin+0x43/0xa0 [2158499.700790] __key_link_begin+0x43/0xa0 [2158499.700814] request_key_and_link+0x2c7/0x730 [2158499.700847] ? dns_resolver_read+0x20/0x20 [dns_resolver] [2158499.700873] ? key_default_cmp+0x20/0x20 [2158499.700898] request_key_tag+0x43/0xa0 [2158499.700926] dns_query+0x114/0x2ca [dns_resolver] [2158499.701127] dns_resolve_server_name_to_ip+0x194/0x310 [cifs] [2158499.701164] ? scnprintf+0x49/0x90 [2158499.701190] ? __switch_to_asm+0x40/0x70 [2158499.701211] ? __switch_to_asm+0x34/0x70 [2158499.701405] reconn_set_ipaddr_from_hostname+0x81/0x2a0 [cifs] [2158499.701603] cifs_resolve_server+0x4b/0xd0 [cifs] [2158499.701632] process_one_work+0x1f8/0x3e0 [2158499.701658] worker_thread+0x2d/0x3f0 [2158499.701682] ? process_one_work+0x3e0/0x3e0 [2158499.701703] kthread+0x10d/0x130 [2158499.701723] ? kthread_park+0xb0/0xb0 [2158499.701746] ret_from_fork+0x1f/0x40 The situation occurs as follows: * Some kernel facility invokes dns_query() to resolve a hostname, for example, "abcdef". The function registers its global DNS resolver cache as current->cred.thread_keyring and passes the query to request_key_net() -> request_key_tag() -> request_key_and_link(). * Function request_key_and_link() creates a keyring_search_context object. Its match_data.cmp method gets set via a call to type->match_preparse() (resolves to dns_resolver_match_preparse()) to dns_resolver_cmp(). * Function request_key_and_link() continues and invokes search_process_keyrings_rcu() which returns that a given key was not found. The control is then passed to request_key_and_link() -> construct_alloc_key(). * Concurrently to that, a second task similarly makes a DNS query for "abcdef." and its result gets inserted into the DNS resolver cache. * Back on the first task, function construct_alloc_key() first runs __key_link_begin() to determine an assoc_array_edit operation to insert a new key. Index keys in the array are compared exactly as-is, using keyring_compare_object(). The operation finds that "abcdef" is not yet present in the destination keyring. * Function construct_alloc_key() continues and checks if a given key is already present on some keyring by again calling search_process_keyrings_rcu(). This search is done using dns_resolver_cmp() and "abcdef" gets matched with now present key "abcdef.". * The found key is linked on the destination keyring by calling __key_link() and using the previously calculated assoc_array_edit operation. This inserts the "abcdef." key in the array but creates a duplicity because the same index key is already present. Fix the problem by postponing __key_link_begin() in construct_alloc_key() until an actual key which should be linked into the destination keyring is determined. [jarkko@kernel.org: added a fixes tag and cc to stable] Cc: stable@vger.kernel.org # v5.3+ Fixes: df593ee23e05 ("keys: Hoist locking out of __key_link_begin()") Signed-off-by: Petr Pavlu Reviewed-by: Joey Lee Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- security/keys/request_key.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 07a0ef2baacd..a7673ad86d18 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -401,17 +401,21 @@ static int construct_alloc_key(struct keyring_search_context *ctx, set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); if (dest_keyring) { - ret = __key_link_lock(dest_keyring, &ctx->index_key); + ret = __key_link_lock(dest_keyring, &key->index_key); if (ret < 0) goto link_lock_failed; - ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit); - if (ret < 0) - goto link_prealloc_failed; } - /* attach the key to the destination keyring under lock, but we do need + /* + * Attach the key to the destination keyring under lock, but we do need * to do another check just in case someone beat us to it whilst we - * waited for locks */ + * waited for locks. + * + * The caller might specify a comparison function which looks for keys + * that do not exactly match but are still equivalent from the caller's + * perspective. The __key_link_begin() operation must be done only after + * an actual key is determined. + */ mutex_lock(&key_construction_mutex); rcu_read_lock(); @@ -420,12 +424,16 @@ static int construct_alloc_key(struct keyring_search_context *ctx, if (!IS_ERR(key_ref)) goto key_already_present; - if (dest_keyring) + if (dest_keyring) { + ret = __key_link_begin(dest_keyring, &key->index_key, &edit); + if (ret < 0) + goto link_alloc_failed; __key_link(dest_keyring, key, &edit); + } mutex_unlock(&key_construction_mutex); if (dest_keyring) - __key_link_end(dest_keyring, &ctx->index_key, edit); + __key_link_end(dest_keyring, &key->index_key, edit); mutex_unlock(&user->cons_lock); *_key = key; kleave(" = 0 [%d]", key_serial(key)); @@ -438,10 +446,13 @@ static int construct_alloc_key(struct keyring_search_context *ctx, mutex_unlock(&key_construction_mutex); key = key_ref_to_ptr(key_ref); if (dest_keyring) { + ret = __key_link_begin(dest_keyring, &key->index_key, &edit); + if (ret < 0) + goto link_alloc_failed_unlocked; ret = __key_link_check_live_key(dest_keyring, key); if (ret == 0) __key_link(dest_keyring, key, &edit); - __key_link_end(dest_keyring, &ctx->index_key, edit); + __key_link_end(dest_keyring, &key->index_key, edit); if (ret < 0) goto link_check_failed; } @@ -456,8 +467,10 @@ static int construct_alloc_key(struct keyring_search_context *ctx, kleave(" = %d [linkcheck]", ret); return ret; -link_prealloc_failed: - __key_link_end(dest_keyring, &ctx->index_key, edit); +link_alloc_failed: + mutex_unlock(&key_construction_mutex); +link_alloc_failed_unlocked: + __key_link_end(dest_keyring, &key->index_key, edit); link_lock_failed: mutex_unlock(&user->cons_lock); key_put(key); From 4410f4a938ae80ffe1c788351dd3756974745a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Georg=20M=C3=BCller?= Date: Wed, 28 Jun 2023 10:45:50 +0200 Subject: [PATCH 442/509] perf probe: Add test for regression introduced by switch to die_get_decl_file() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 56cbeacf143530576905623ac72ae0964f3293a6 upstream. This patch adds a test to validate that 'perf probe' works for binaries where DWARF info is split into multiple CUs Signed-off-by: Georg MĆ¼ller Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: regressions@lists.linux.dev Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230628084551.1860532-5-georgmueller@gmx.net Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- .../shell/test_uprobe_from_different_cu.sh | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tools/perf/tests/shell/test_uprobe_from_different_cu.sh diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh new file mode 100644 index 000000000000..00d2e0e2e0c2 --- /dev/null +++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# test perf probe of function from different CU +# SPDX-License-Identifier: GPL-2.0 + +set -e + +temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX) + +cleanup() +{ + trap - EXIT TERM INT + if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then + echo "--- Cleaning up ---" + perf probe -x ${temp_dir}/testfile -d foo + rm -f "${temp_dir}/"* + rmdir "${temp_dir}" + fi +} + +trap_cleanup() +{ + cleanup + exit 1 +} + +trap trap_cleanup EXIT TERM INT + +cat > ${temp_dir}/testfile-foo.h << EOF +struct t +{ + int *p; + int c; +}; + +extern int foo (int i, struct t *t); +EOF + +cat > ${temp_dir}/testfile-foo.c << EOF +#include "testfile-foo.h" + +int +foo (int i, struct t *t) +{ + int j, res = 0; + for (j = 0; j < i && j < t->c; j++) + res += t->p[j]; + + return res; +} +EOF + +cat > ${temp_dir}/testfile-main.c << EOF +#include "testfile-foo.h" + +static struct t g; + +int +main (int argc, char **argv) +{ + int i; + int j[argc]; + g.c = argc; + g.p = j; + for (i = 0; i < argc; i++) + j[i] = (int) argv[i][0]; + return foo (3, &g); +} +EOF + +gcc -g -Og -flto -c ${temp_dir}/testfile-foo.c -o ${temp_dir}/testfile-foo.o +gcc -g -Og -c ${temp_dir}/testfile-main.c -o ${temp_dir}/testfile-main.o +gcc -g -Og -o ${temp_dir}/testfile ${temp_dir}/testfile-foo.o ${temp_dir}/testfile-main.o + +perf probe -x ${temp_dir}/testfile --funcs foo +perf probe -x ${temp_dir}/testfile foo + +cleanup From d2c667cc18314c9bad3ec86ae071c0342132aa09 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 14 Jul 2023 13:42:06 +0100 Subject: [PATCH 443/509] btrfs: fix warning when putting transaction with qgroups enabled after abort commit aa84ce8a78a1a5c10cdf9c7a5fb0c999fbc2c8d6 upstream. If we have a transaction abort with qgroups enabled we get a warning triggered when doing the final put on the transaction, like this: [552.6789] ------------[ cut here ]------------ [552.6815] WARNING: CPU: 4 PID: 81745 at fs/btrfs/transaction.c:144 btrfs_put_transaction+0x123/0x130 [btrfs] [552.6817] Modules linked in: btrfs blake2b_generic xor (...) [552.6819] CPU: 4 PID: 81745 Comm: btrfs-transacti Tainted: G W 6.4.0-rc6-btrfs-next-134+ #1 [552.6819] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-0-gea1b7a073390-prebuilt.qemu.org 04/01/2014 [552.6819] RIP: 0010:btrfs_put_transaction+0x123/0x130 [btrfs] [552.6821] Code: bd a0 01 00 (...) [552.6821] RSP: 0018:ffffa168c0527e28 EFLAGS: 00010286 [552.6821] RAX: ffff936042caed00 RBX: ffff93604a3eb448 RCX: 0000000000000000 [552.6821] RDX: ffff93606421b028 RSI: ffffffff92ff0878 RDI: ffff93606421b010 [552.6821] RBP: ffff93606421b000 R08: 0000000000000000 R09: ffffa168c0d07c20 [552.6821] R10: 0000000000000000 R11: ffff93608dc52950 R12: ffffa168c0527e70 [552.6821] R13: ffff93606421b000 R14: ffff93604a3eb420 R15: ffff93606421b028 [552.6821] FS: 0000000000000000(0000) GS:ffff93675fb00000(0000) knlGS:0000000000000000 [552.6821] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [552.6821] CR2: 0000558ad262b000 CR3: 000000014feda005 CR4: 0000000000370ee0 [552.6822] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [552.6822] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [552.6822] Call Trace: [552.6822] [552.6822] ? __warn+0x80/0x130 [552.6822] ? btrfs_put_transaction+0x123/0x130 [btrfs] [552.6824] ? report_bug+0x1f4/0x200 [552.6824] ? handle_bug+0x42/0x70 [552.6824] ? exc_invalid_op+0x14/0x70 [552.6824] ? asm_exc_invalid_op+0x16/0x20 [552.6824] ? btrfs_put_transaction+0x123/0x130 [btrfs] [552.6826] btrfs_cleanup_transaction+0xe7/0x5e0 [btrfs] [552.6828] ? _raw_spin_unlock_irqrestore+0x23/0x40 [552.6828] ? try_to_wake_up+0x94/0x5e0 [552.6828] ? __pfx_process_timeout+0x10/0x10 [552.6828] transaction_kthread+0x103/0x1d0 [btrfs] [552.6830] ? __pfx_transaction_kthread+0x10/0x10 [btrfs] [552.6832] kthread+0xee/0x120 [552.6832] ? __pfx_kthread+0x10/0x10 [552.6832] ret_from_fork+0x29/0x50 [552.6832] [552.6832] ---[ end trace 0000000000000000 ]--- This corresponds to this line of code: void btrfs_put_transaction(struct btrfs_transaction *transaction) { (...) WARN_ON(!RB_EMPTY_ROOT( &transaction->delayed_refs.dirty_extent_root)); (...) } The warning happens because btrfs_qgroup_destroy_extent_records(), called in the transaction abort path, we free all entries from the rbtree "dirty_extent_root" with rbtree_postorder_for_each_entry_safe(), but we don't actually empty the rbtree - it's still pointing to nodes that were freed. So set the rbtree's root node to NULL to avoid this warning (assign RB_ROOT). Fixes: 81f7eb00ff5b ("btrfs: destroy qgroup extent records on transaction abort") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Josef Bacik Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 94edc74c8979..828a7ff4aebe 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4376,4 +4376,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) ulist_free(entry->old_roots); kfree(entry); } + *root = RB_ROOT; } From dad97c205af2ac0af419b98c2d6fa267f7ced8dd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 7 Jun 2023 17:49:20 +0200 Subject: [PATCH 444/509] fuse: revalidate: don't invalidate if interrupted commit a9d1c4c6df0e568207907c04aed9e7beb1294c42 upstream. If the LOOKUP request triggered from fuse_dentry_revalidate() is interrupted, then the dentry will be invalidated, possibly resulting in submounts being unmounted. Reported-by: Xu Rongbo Closes: https://lore.kernel.org/all/CAJfpegswN_CJJ6C3RZiaK6rpFmNyWmXfaEpnQUJ42KCwNF5tWw@mail.gmail.com/ Fixes: 9e6268db496a ("[PATCH] FUSE - read-write operations") Cc: Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e3b9b7d188e6..b0c701c007c6 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -249,7 +249,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) spin_unlock(&fi->lock); } kfree(forget); - if (ret == -ENOMEM) + if (ret == -ENOMEM || ret == -EINTR) goto out; if (ret || fuse_invalid_attr(&outarg.attr) || fuse_stale_inode(inode, outarg.generation, &outarg.attr)) From 1ab5aa1846a5542be9a5af1455408930d22b347e Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 13 Jul 2023 23:16:44 +0200 Subject: [PATCH 445/509] selftests: tc: set timeout to 15 minutes commit fda05798c22a354efde09a76bdfc276b2d591829 upstream. When looking for something else in LKFT reports [1], I noticed that the TC selftest ended with a timeout error: not ok 1 selftests: tc-testing: tdc.sh # TIMEOUT 45 seconds The timeout had been introduced 3 years ago, see the Fixes commit below. This timeout is only in place when executing the selftests via the kselftests runner scripts. I guess this is not what most TC devs are using and nobody noticed the issue before. The new timeout is set to 15 minutes as suggested by Pedro [2]. It looks like it is plenty more time than what it takes in "normal" conditions. Fixes: 852c8cbf34d3 ("selftests/kselftest/runner.sh: Add 45 second timeout per test") Cc: stable@vger.kernel.org Link: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20230711/testrun/18267241/suite/kselftest-tc-testing/test/tc-testing_tdc_sh/log [1] Link: https://lore.kernel.org/netdev/0e061d4a-9a23-9f58-3b35-d8919de332d7@tessares.net/T/ [2] Suggested-by: Pedro Tammela Signed-off-by: Matthieu Baerts Reviewed-by: Zhengchao Shao Link: https://lore.kernel.org/r/20230713-tc-selftests-lkft-v1-1-1eb4fd3a96e7@tessares.net Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/tc-testing/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/tc-testing/settings diff --git a/tools/testing/selftests/tc-testing/settings b/tools/testing/selftests/tc-testing/settings new file mode 100644 index 000000000000..e2206265f67c --- /dev/null +++ b/tools/testing/selftests/tc-testing/settings @@ -0,0 +1 @@ +timeout=900 From 4935761daa339656ac2417835d54c85a99fda386 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 13 Jul 2023 23:16:45 +0200 Subject: [PATCH 446/509] selftests: tc: add 'ct' action kconfig dep commit 719b4774a8cb1a501e2d22a5a4a3a0a870e427d5 upstream. When looking for something else in LKFT reports [1], I noticed most of the tests were skipped because the "teardown stage" did not complete successfully. Pedro found out this is due to the fact CONFIG_NF_FLOW_TABLE is required but not listed in the 'config' file. Adding it to the list fixes the issues on LKFT side. CONFIG_NET_ACT_CT is now set to 'm' in the final kconfig. Fixes: c34b961a2492 ("net/sched: act_ct: Create nf flow table per zone") Cc: stable@vger.kernel.org Link: https://qa-reports.linaro.org/lkft/linux-next-master/build/next-20230711/testrun/18267241/suite/kselftest-tc-testing/test/tc-testing_tdc_sh/log [1] Link: https://lore.kernel.org/netdev/0e061d4a-9a23-9f58-3b35-d8919de332d7@tessares.net/T/ [2] Suggested-by: Pedro Tammela Signed-off-by: Matthieu Baerts Tested-by: Zhengchao Shao Link: https://lore.kernel.org/r/20230713-tc-selftests-lkft-v1-2-1eb4fd3a96e7@tessares.net Acked-by: Jamal Hadi Salim Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/tc-testing/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index b71828df5a6d..5f581c3a1131 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -5,6 +5,7 @@ CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_LABELS=y +CONFIG_NF_FLOW_TABLE=m CONFIG_NF_NAT=m CONFIG_NET_SCHED=y From 8b3dd8d23fa0cf790699244dad126591254793f2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Jul 2023 12:16:39 +0100 Subject: [PATCH 447/509] regmap: Drop initial version of maximum transfer length fixes commit bc64734825c59e18a27ac266b07e14944c111fd8 upstream. When problems were noticed with the register address not being taken into account when limiting raw transfers with I2C devices we fixed this in the core. Unfortunately it has subsequently been realised that a lot of buses were relying on the prior behaviour, partly due to unclear documentation not making it obvious what was intended in the core. This is all more involved to fix than is sensible for a fix commit so let's just drop the original fixes, a separate commit will fix the originally observed problem in an I2C specific way Fixes: 3981514180c9 ("regmap: Account for register length when chunking") Fixes: c8e796895e23 ("regmap: spi-avmm: Fix regmap_bus max_raw_write") Signed-off-by: Mark Brown Reviewed-by: Xu Yilun Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230712-regmap-max-transfer-v1-1-80e2aed22e83@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-spi-avmm.c | 2 +- drivers/base/regmap/regmap.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/base/regmap/regmap-spi-avmm.c b/drivers/base/regmap/regmap-spi-avmm.c index 67f89937219c..ad1da83e849f 100644 --- a/drivers/base/regmap/regmap-spi-avmm.c +++ b/drivers/base/regmap/regmap-spi-avmm.c @@ -666,7 +666,7 @@ static const struct regmap_bus regmap_spi_avmm_bus = { .reg_format_endian_default = REGMAP_ENDIAN_NATIVE, .val_format_endian_default = REGMAP_ENDIAN_NATIVE, .max_raw_read = SPI_AVMM_VAL_SIZE * MAX_READ_CNT, - .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT, + .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT, .free_context = spi_avmm_bridge_ctx_free, }; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 2a3c3dfefdce..55a30afc14a0 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1998,8 +1998,6 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, size_t val_count = val_len / val_bytes; size_t chunk_count, chunk_bytes; size_t chunk_regs = val_count; - size_t max_data = map->max_raw_write - map->format.reg_bytes - - map->format.pad_bytes; int ret, i; if (!val_count) @@ -2007,8 +2005,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, if (map->use_single_write) chunk_regs = 1; - else if (map->max_raw_write && val_len > max_data) - chunk_regs = max_data / val_bytes; + else if (map->max_raw_write && val_len > map->max_raw_write) + chunk_regs = map->max_raw_write / val_bytes; chunk_count = val_count / chunk_regs; chunk_bytes = chunk_regs * val_bytes; From 3e412b6e2b57658905b1c89ddec05504f4342232 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 12 Jul 2023 12:16:40 +0100 Subject: [PATCH 448/509] regmap: Account for register length in SMBus I/O limits commit 0c9d2eb5e94792fe64019008a04d4df5e57625af upstream. The SMBus I2C buses have limits on the size of transfers they can do but do not factor in the register length meaning we may try to do a transfer longer than our length limit, the core will not take care of this. Future changes will factor this out into the core but there are a number of users that assume current behaviour so let's just do something conservative here. This does not take account padding bits but practically speaking these are very rarely if ever used on I2C buses given that they generally run slowly enough to mean there's no issue. Cc: stable@kernel.org Signed-off-by: Mark Brown Reviewed-by: Xu Yilun Link: https://lore.kernel.org/r/20230712-regmap-max-transfer-v1-2-80e2aed22e83@kernel.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-i2c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c index 62b95a9212ae..051c10e730f9 100644 --- a/drivers/base/regmap/regmap-i2c.c +++ b/drivers/base/regmap/regmap-i2c.c @@ -242,8 +242,8 @@ static int regmap_i2c_smbus_i2c_read(void *context, const void *reg, static const struct regmap_bus regmap_i2c_smbus_i2c_block = { .write = regmap_i2c_smbus_i2c_write, .read = regmap_i2c_smbus_i2c_read, - .max_raw_read = I2C_SMBUS_BLOCK_MAX, - .max_raw_write = I2C_SMBUS_BLOCK_MAX, + .max_raw_read = I2C_SMBUS_BLOCK_MAX - 1, + .max_raw_write = I2C_SMBUS_BLOCK_MAX - 1, }; static int regmap_i2c_smbus_i2c_write_reg16(void *context, const void *data, @@ -299,8 +299,8 @@ static int regmap_i2c_smbus_i2c_read_reg16(void *context, const void *reg, static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = { .write = regmap_i2c_smbus_i2c_write_reg16, .read = regmap_i2c_smbus_i2c_read_reg16, - .max_raw_read = I2C_SMBUS_BLOCK_MAX, - .max_raw_write = I2C_SMBUS_BLOCK_MAX, + .max_raw_read = I2C_SMBUS_BLOCK_MAX - 2, + .max_raw_write = I2C_SMBUS_BLOCK_MAX - 2, }; static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c, From cf254b4f68e480e73dab055014e002b77aed30ed Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 15 Jul 2023 17:25:43 +0800 Subject: [PATCH 449/509] can: bcm: Fix UAF in bcm_proc_show() commit 55c3b96074f3f9b0aee19bf93cd71af7516582bb upstream. BUG: KASAN: slab-use-after-free in bcm_proc_show+0x969/0xa80 Read of size 8 at addr ffff888155846230 by task cat/7862 CPU: 1 PID: 7862 Comm: cat Not tainted 6.5.0-rc1-00153-gc8746099c197 #230 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Call Trace: dump_stack_lvl+0xd5/0x150 print_report+0xc1/0x5e0 kasan_report+0xba/0xf0 bcm_proc_show+0x969/0xa80 seq_read_iter+0x4f6/0x1260 seq_read+0x165/0x210 proc_reg_read+0x227/0x300 vfs_read+0x1d5/0x8d0 ksys_read+0x11e/0x240 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd Allocated by task 7846: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 __kasan_kmalloc+0x9e/0xa0 bcm_sendmsg+0x264b/0x44e0 sock_sendmsg+0xda/0x180 ____sys_sendmsg+0x735/0x920 ___sys_sendmsg+0x11d/0x1b0 __sys_sendmsg+0xfa/0x1d0 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 7846: kasan_save_stack+0x1e/0x40 kasan_set_track+0x21/0x30 kasan_save_free_info+0x27/0x40 ____kasan_slab_free+0x161/0x1c0 slab_free_freelist_hook+0x119/0x220 __kmem_cache_free+0xb4/0x2e0 rcu_core+0x809/0x1bd0 bcm_op is freed before procfs entry be removed in bcm_release(), this lead to bcm_proc_show() may read the freed bcm_op. Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol") Signed-off-by: YueHaibing Reviewed-by: Oliver Hartkopp Acked-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20230715092543.15548-1-yuehaibing@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/bcm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index ddba4e12da78..2388c619f29c 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1521,6 +1521,12 @@ static int bcm_release(struct socket *sock) lock_sock(sk); +#if IS_ENABLED(CONFIG_PROC_FS) + /* remove procfs entry */ + if (net->can.bcmproc_dir && bo->bcm_proc_read) + remove_proc_entry(bo->procname, net->can.bcmproc_dir); +#endif /* CONFIG_PROC_FS */ + list_for_each_entry_safe(op, next, &bo->tx_ops, list) bcm_remove_op(op); @@ -1556,12 +1562,6 @@ static int bcm_release(struct socket *sock) list_for_each_entry_safe(op, next, &bo->rx_ops, list) bcm_remove_op(op); -#if IS_ENABLED(CONFIG_PROC_FS) - /* remove procfs entry */ - if (net->can.bcmproc_dir && bo->bcm_proc_read) - remove_proc_entry(bo->procname, net->can.bcmproc_dir); -#endif /* CONFIG_PROC_FS */ - /* remove device reference */ if (bo->bound) { bo->bound = 0; From 105275879a80503686a8108af2f5c579a1c5aef4 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 11 Jul 2023 11:20:43 +0200 Subject: [PATCH 450/509] drm/client: Fix memory leak in drm_client_target_cloned commit c2a88e8bdf5f6239948d75283d0ae7e0c7945b03 upstream. dmt_mode is allocated and never freed in this function. It was found with the ast driver, but most drivers using generic fbdev setup are probably affected. This fixes the following kmemleak report: backtrace: [<00000000b391296d>] drm_mode_duplicate+0x45/0x220 [drm] [<00000000e45bb5b3>] drm_client_target_cloned.constprop.0+0x27b/0x480 [drm] [<00000000ed2d3a37>] drm_client_modeset_probe+0x6bd/0xf50 [drm] [<0000000010e5cc9d>] __drm_fb_helper_initial_config_and_unlock+0xb4/0x2c0 [drm_kms_helper] [<00000000909f82ca>] drm_fbdev_client_hotplug+0x2bc/0x4d0 [drm_kms_helper] [<00000000063a69aa>] drm_client_register+0x169/0x240 [drm] [<00000000a8c61525>] ast_pci_probe+0x142/0x190 [ast] [<00000000987f19bb>] local_pci_probe+0xdc/0x180 [<000000004fca231b>] work_for_cpu_fn+0x4e/0xa0 [<0000000000b85301>] process_one_work+0x8b7/0x1540 [<000000003375b17c>] worker_thread+0x70a/0xed0 [<00000000b0d43cd9>] kthread+0x29f/0x340 [<000000008d770833>] ret_from_fork+0x1f/0x30 unreferenced object 0xff11000333089a00 (size 128): cc: Fixes: 1d42bbc8f7f9 ("drm/fbdev: fix cloning on fbcon") Reported-by: Zhang Yi Signed-off-by: Jocelyn Falempe Reviewed-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230711092203.68157-2-jfalempe@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_client_modeset.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index b7e9e1c2564c..844a60ad96bf 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -308,6 +308,9 @@ static bool drm_client_target_cloned(struct drm_device *dev, can_clone = true; dmt_mode = drm_mode_find_dmt(dev, 1024, 768, 60, false); + if (!dmt_mode) + goto fail; + for (i = 0; i < connector_count; i++) { if (!enabled[i]) continue; @@ -323,11 +326,13 @@ static bool drm_client_target_cloned(struct drm_device *dev, if (!modes[i]) can_clone = false; } + kfree(dmt_mode); if (can_clone) { DRM_DEBUG_KMS("can clone using 1024x768\n"); return true; } +fail: DRM_INFO("kms: can't enable cloning when we probably wanted to.\n"); return false; } From 5f2a12f64347f535c6ef55fa7eb36a2874d69b59 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 11 Jul 2023 11:20:44 +0200 Subject: [PATCH 451/509] drm/client: Fix memory leak in drm_client_modeset_probe commit 2329cc7a101af1a844fbf706c0724c0baea38365 upstream. When a new mode is set to modeset->mode, the previous mode should be freed. This fixes the following kmemleak report: drm_mode_duplicate+0x45/0x220 [drm] drm_client_modeset_probe+0x944/0xf50 [drm] __drm_fb_helper_initial_config_and_unlock+0xb4/0x2c0 [drm_kms_helper] drm_fbdev_client_hotplug+0x2bc/0x4d0 [drm_kms_helper] drm_client_register+0x169/0x240 [drm] ast_pci_probe+0x142/0x190 [ast] local_pci_probe+0xdc/0x180 work_for_cpu_fn+0x4e/0xa0 process_one_work+0x8b7/0x1540 worker_thread+0x70a/0xed0 kthread+0x29f/0x340 ret_from_fork+0x1f/0x30 cc: Reported-by: Zhang Yi Signed-off-by: Jocelyn Falempe Reviewed-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230711092203.68157-3-jfalempe@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_client_modeset.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 844a60ad96bf..d5fd41823624 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -864,6 +864,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, break; } + kfree(modeset->mode); modeset->mode = drm_mode_duplicate(dev, mode); drm_connector_get(connector); modeset->connectors[modeset->num_connectors++] = connector; From 48aa5393758493fe1069ccca4bd4abac06fc3976 Mon Sep 17 00:00:00 2001 From: Matus Gajdos Date: Wed, 12 Jul 2023 14:49:33 +0200 Subject: [PATCH 452/509] ASoC: fsl_sai: Disable bit clock with transmitter commit 269f399dc19f0e5c51711c3ba3bd06e0ef6ef403 upstream. Otherwise bit clock remains running writing invalid data to the DAC. Signed-off-by: Matus Gajdos Acked-by: Shengjiu Wang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230712124934.32232-1-matuszpd@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/fsl/fsl_sai.c | 2 +- sound/soc/fsl/fsl_sai.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 6a5d2b08e271..03731d14d475 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -552,7 +552,7 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir) u32 xcsr, count = 100; regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs), - FSL_SAI_CSR_TERE, 0); + FSL_SAI_CSR_TERE | FSL_SAI_CSR_BCE, 0); /* TERE will remain set till the end of current frame */ do { diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index 8923c680f0e0..691847d54b17 100644 --- a/sound/soc/fsl/fsl_sai.h +++ b/sound/soc/fsl/fsl_sai.h @@ -87,6 +87,7 @@ /* SAI Transmit/Receive Control Register */ #define FSL_SAI_CSR_TERE BIT(31) #define FSL_SAI_CSR_SE BIT(30) +#define FSL_SAI_CSR_BCE BIT(28) #define FSL_SAI_CSR_FR BIT(25) #define FSL_SAI_CSR_SR BIT(24) #define FSL_SAI_CSR_xF_SHIFT 16 From 5d5aa5b64887689d73ef7a4d5c17c2dfcfb26812 Mon Sep 17 00:00:00 2001 From: Eric Whitney Date: Mon, 22 May 2023 14:15:20 -0400 Subject: [PATCH 453/509] ext4: correct inline offset when handling xattrs in inode body commit 6909cf5c4101214f4305a62d582a5b93c7e1eb9a upstream. When run on a file system where the inline_data feature has been enabled, xfstests generic/269, generic/270, and generic/476 cause ext4 to emit error messages indicating that inline directory entries are corrupted. This occurs because the inline offset used to locate inline directory entries in the inode body is not updated when an xattr in that shared region is deleted and the region is shifted in memory to recover the space it occupied. If the deleted xattr precedes the system.data attribute, which points to the inline directory entries, that attribute will be moved further up in the region. The inline offset continues to point to whatever is located in system.data's former location, with unfortunate effects when used to access directory entries or (presumably) inline data in the inode body. Cc: stable@kernel.org Signed-off-by: Eric Whitney Link: https://lore.kernel.org/r/20230522181520.1570360-1-enwlinux@gmail.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/xattr.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index ed39101dc7b6..5bf858fc271f 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1725,6 +1725,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, memmove(here, (void *)here + size, (void *)last - (void *)here + sizeof(__u32)); memset(last, 0, size); + + /* + * Update i_inline_off - moved ibody region might contain + * system.data attribute. Handling a failure here won't + * cause other complications for setting an xattr. + */ + if (!is_block && ext4_has_inline_data(inode)) { + ret = ext4_find_inline_data_nolock(inode); + if (ret) { + ext4_warning_inode(inode, + "unable to update i_inline_off"); + goto out; + } + } } else if (s->not_found) { /* Insert new name. */ size_t size = EXT4_XATTR_LEN(name_len); From 5f84a34b646f6e52fa8d39bd6f586264c0e68703 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 7 Jun 2023 19:19:02 +0900 Subject: [PATCH 454/509] debugobjects: Recheck debug_objects_enabled before reporting [ Upstream commit 8b64d420fe2450f82848178506d3e3a0bd195539 ] syzbot is reporting false a positive ODEBUG message immediately after ODEBUG was disabled due to OOM. [ 1062.309646][T22911] ODEBUG: Out of memory. ODEBUG disabled [ 1062.886755][ T5171] ------------[ cut here ]------------ [ 1062.892770][ T5171] ODEBUG: assert_init not available (active state 0) object: ffffc900056afb20 object type: timer_list hint: process_timeout+0x0/0x40 CPU 0 [ T5171] CPU 1 [T22911] -------------- -------------- debug_object_assert_init() { if (!debug_objects_enabled) return; db = get_bucket(addr); lookup_object_or_alloc() { debug_objects_enabled = 0; return NULL; } debug_objects_oom() { pr_warn("Out of memory. ODEBUG disabled\n"); // all buckets get emptied here, and } lookup_object_or_alloc(addr, db, descr, false, true) { // this bucket is already empty. return ERR_PTR(-ENOENT); } // Emits false positive warning. debug_print_object(&o, "assert_init"); } Recheck debug_object_enabled in debug_print_object() to avoid that. Reported-by: syzbot Suggested-by: Thomas Gleixner Signed-off-by: Tetsuo Handa Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/492fe2ae-5141-d548-ebd5-62f5fe2e57f7@I-love.SAKURA.ne.jp Closes: https://syzkaller.appspot.com/bug?extid=7937ba6a50bdd00fffdf Signed-off-by: Sasha Levin --- lib/debugobjects.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 4c39678c03ee..4dd9283f6fea 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -501,6 +501,15 @@ static void debug_print_object(struct debug_obj *obj, char *msg) const struct debug_obj_descr *descr = obj->descr; static int limit; + /* + * Don't report if lookup_object_or_alloc() by the current thread + * failed because lookup_object_or_alloc()/debug_objects_oom() by a + * concurrent thread turned off debug_objects_enabled and cleared + * the hash buckets. + */ + if (!debug_objects_enabled) + return; + if (limit < 5 && descr != descr_test) { void *hint = descr->debug_hint ? descr->debug_hint(obj->object) : NULL; From 4a2c62c8d67cdbf42f217744ffa0fd9d077a589f Mon Sep 17 00:00:00 2001 From: Zhong Jinghua Date: Mon, 5 Jun 2023 20:21:59 +0800 Subject: [PATCH 455/509] nbd: Add the maximum limit of allocated index in nbd_dev_add [ Upstream commit f12bc113ce904777fd6ca003b473b427782b3dde ] If the index allocated by idr_alloc greater than MINORMASK >> part_shift, the device number will overflow, resulting in failure to create a block device. Fix it by imiting the size of the max allocation. Signed-off-by: Zhong Jinghua Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230605122159.2134384-1-zhongjinghua@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/nbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b6940f0a9c90..e0f805ca0e72 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1723,7 +1723,8 @@ static int nbd_dev_add(int index) if (err == -ENOSPC) err = -EEXIST; } else { - err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL); + err = idr_alloc(&nbd_index_idr, nbd, 0, + (MINORMASK >> part_shift) + 1, GFP_KERNEL); if (err >= 0) index = err; } From b02939413e5c9bbea58e220c1e9733930edb6d1a Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 12 May 2023 09:56:07 +0800 Subject: [PATCH 456/509] md: fix data corruption for raid456 when reshape restart while grow up [ Upstream commit 873f50ece41aad5c4f788a340960c53774b5526e ] Currently, if reshape is interrupted, echo "reshape" to sync_action will restart reshape from scratch, for example: echo frozen > sync_action echo reshape > sync_action This will corrupt data before reshape_position if the array is growing, fix the problem by continue reshape from reshape_position. Reported-by: Peter Neuwirth Link: https://lore.kernel.org/linux-raid/e2f96772-bfbc-f43b-6da1-f520e5164536@online.de/ Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230512015610.821290-3-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/md.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index bbf39abc32b7..ae0a857d6076 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4890,11 +4890,21 @@ action_store(struct mddev *mddev, const char *page, size_t len) return -EINVAL; err = mddev_lock(mddev); if (!err) { - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { err = -EBUSY; - else { + } else if (mddev->reshape_position == MaxSector || + mddev->pers->check_reshape == NULL || + mddev->pers->check_reshape(mddev)) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); err = mddev->pers->start_reshape(mddev); + } else { + /* + * If reshape is still in progress, and + * md_check_recovery() can continue to reshape, + * don't restart reshape because data can be + * corrupted for raid456. + */ + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); } mddev_unlock(mddev); } From 634daf6b2c81015cc5e28bf694a6a94a50c641cd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Mon, 29 May 2023 21:11:00 +0800 Subject: [PATCH 457/509] md/raid10: prevent soft lockup while flush writes [ Upstream commit 010444623e7f4da6b4a4dd603a7da7469981e293 ] Currently, there is no limit for raid1/raid10 plugged bio. While flushing writes, raid1 has cond_resched() while raid10 doesn't, and too many writes can cause soft lockup. Follow up soft lockup can be triggered easily with writeback test for raid10 with ramdisks: watchdog: BUG: soft lockup - CPU#10 stuck for 27s! [md0_raid10:1293] Call Trace: call_rcu+0x16/0x20 put_object+0x41/0x80 __delete_object+0x50/0x90 delete_object_full+0x2b/0x40 kmemleak_free+0x46/0xa0 slab_free_freelist_hook.constprop.0+0xed/0x1a0 kmem_cache_free+0xfd/0x300 mempool_free_slab+0x1f/0x30 mempool_free+0x3a/0x100 bio_free+0x59/0x80 bio_put+0xcf/0x2c0 free_r10bio+0xbf/0xf0 raid_end_bio_io+0x78/0xb0 one_write_done+0x8a/0xa0 raid10_end_write_request+0x1b4/0x430 bio_endio+0x175/0x320 brd_submit_bio+0x3b9/0x9b7 [brd] __submit_bio+0x69/0xe0 submit_bio_noacct_nocheck+0x1e6/0x5a0 submit_bio_noacct+0x38c/0x7e0 flush_pending_writes+0xf0/0x240 raid10d+0xac/0x1ed0 Fix the problem by adding cond_resched() to raid10 like what raid1 did. Note that unlimited plugged bio still need to be optimized, for example, in the case of lots of dirty pages writeback, this will take lots of memory and io will spend a long time in plug, hence io latency is bad. Signed-off-by: Yu Kuai Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230529131106.2123367-2-yukuai1@huaweicloud.com Signed-off-by: Sasha Levin --- drivers/md/raid10.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 32a917e5103a..55144f7d9303 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -902,6 +902,7 @@ static void flush_pending_writes(struct r10conf *conf) else submit_bio_noacct(bio); bio = next; + cond_resched(); } blk_finish_plug(&plug); } else @@ -1095,6 +1096,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) else submit_bio_noacct(bio); bio = next; + cond_resched(); } kfree(plug); } From 322377cc909defcca9451487484845e7e1d20d1b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 1 Jun 2023 20:58:47 +0200 Subject: [PATCH 458/509] posix-timers: Ensure timer ID search-loop limit is valid [ Upstream commit 8ce8849dd1e78dadcee0ec9acbd259d239b7069f ] posix_timer_add() tries to allocate a posix timer ID by starting from the cached ID which was stored by the last successful allocation. This is done in a loop searching the ID space for a free slot one by one. The loop has to terminate when the search wrapped around to the starting point. But that's racy vs. establishing the starting point. That is read out lockless, which leads to the following problem: CPU0 CPU1 posix_timer_add() start = sig->posix_timer_id; lock(hash_lock); ... posix_timer_add() if (++sig->posix_timer_id < 0) start = sig->posix_timer_id; sig->posix_timer_id = 0; So CPU1 can observe a negative start value, i.e. -1, and the loop break never happens because the condition can never be true: if (sig->posix_timer_id == start) break; While this is unlikely to ever turn into an endless loop as the ID space is huge (INT_MAX), the racy read of the start value caught the attention of KCSAN and Dmitry unearthed that incorrectness. Rewrite it so that all id operations are under the hash lock. Reported-by: syzbot+5c54bd3eb218bb595aa9@syzkaller.appspotmail.com Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/87bkhzdn6g.ffs@tglx Signed-off-by: Sasha Levin --- include/linux/sched/signal.h | 2 +- kernel/time/posix-timers.c | 31 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ae60f838ebb9..2c634010cc7b 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -125,7 +125,7 @@ struct signal_struct { #ifdef CONFIG_POSIX_TIMERS /* POSIX.1b Interval Timers */ - int posix_timer_id; + unsigned int next_posix_timer_id; struct list_head posix_timers; /* ITIMER_REAL timer for the process */ diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 6d12a724d2b6..29569b1c3d8c 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -140,25 +140,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id) static int posix_timer_add(struct k_itimer *timer) { struct signal_struct *sig = current->signal; - int first_free_id = sig->posix_timer_id; struct hlist_head *head; - int ret = -ENOENT; + unsigned int cnt, id; - do { + /* + * FIXME: Replace this by a per signal struct xarray once there is + * a plan to handle the resulting CRIU regression gracefully. + */ + for (cnt = 0; cnt <= INT_MAX; cnt++) { spin_lock(&hash_lock); - head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)]; - if (!__posix_timers_find(head, sig, sig->posix_timer_id)) { + id = sig->next_posix_timer_id; + + /* Write the next ID back. Clamp it to the positive space */ + sig->next_posix_timer_id = (id + 1) & INT_MAX; + + head = &posix_timers_hashtable[hash(sig, id)]; + if (!__posix_timers_find(head, sig, id)) { hlist_add_head_rcu(&timer->t_hash, head); - ret = sig->posix_timer_id; + spin_unlock(&hash_lock); + return id; } - if (++sig->posix_timer_id < 0) - sig->posix_timer_id = 0; - if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT)) - /* Loop over all possible ids completed */ - ret = -EAGAIN; spin_unlock(&hash_lock); - } while (ret == -ENOENT); - return ret; + } + /* POSIX return code when no timer ID could be allocated */ + return -EAGAIN; } static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) From ae51eb90bcca2f8a9e4b76fe35cc76df0d915621 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 4 Apr 2023 00:06:02 +0200 Subject: [PATCH 459/509] btrfs: add xxhash to fast checksum implementations [ Upstream commit efcfcbc6a36195c42d98e0ee697baba36da94dc8 ] The implementation of XXHASH is now CPU only but still fast enough to be considered for the synchronous checksumming, like non-generic crc32c. A userspace benchmark comparing it to various implementations (patched hash-speedtest from btrfs-progs): Block size: 4096 Iterations: 1000000 Implementation: builtin Units: CPU cycles NULL-NOP: cycles: 73384294, cycles/i 73 NULL-MEMCPY: cycles: 228033868, cycles/i 228, 61664.320 MiB/s CRC32C-ref: cycles: 24758559416, cycles/i 24758, 567.950 MiB/s CRC32C-NI: cycles: 1194350470, cycles/i 1194, 11773.433 MiB/s CRC32C-ADLERSW: cycles: 6150186216, cycles/i 6150, 2286.372 MiB/s CRC32C-ADLERHW: cycles: 626979180, cycles/i 626, 22427.453 MiB/s CRC32C-PCL: cycles: 466746732, cycles/i 466, 30126.699 MiB/s XXHASH: cycles: 860656400, cycles/i 860, 16338.188 MiB/s Comparing purely software implementation (ref), current outdated accelerated using crc32q instruction (NI), optimized implementations by M. Adler (https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software/17646775#17646775) and the best one that was taken from kernel using the PCLMULQDQ instruction (PCL). Reviewed-by: Christoph Hellwig Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5a114cad988a..608b939a4d28 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2256,6 +2256,9 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) if (!strstr(crypto_shash_driver_name(csum_shash), "generic")) set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); break; + case BTRFS_CSUM_TYPE_XXHASH: + set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); + break; default: break; } From e090f70ae4cc1805c8442e719e40e7e0fbb964f1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 29 Apr 2023 12:38:41 +0200 Subject: [PATCH 460/509] ACPI: button: Add lid disable DMI quirk for Nextbook Ares 8A [ Upstream commit 4fd5556608bfa9c2bf276fc115ef04288331aded ] The LID0 device on the Nextbook Ares 8A tablet always reports lid closed causing userspace to suspend the device as soon as booting is complete. Add a DMI quirk to disable the broken lid functionality. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/button.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 0d93a5ef4d07..4861aad1a9e9 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -82,6 +82,15 @@ static const struct dmi_system_id dmi_lid_quirks[] = { }, .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_DISABLED, }, + { + /* Nextbook Ares 8A tablet, _LID device always reports lid closed */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "CherryTrail"), + DMI_MATCH(DMI_BIOS_VERSION, "M882"), + }, + .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_DISABLED, + }, { /* * Medion Akoya E2215T, notification of the LID device only From 776a72f612a82ea0ea7fc5727226e3f57ea82545 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 17 May 2023 11:23:58 +0200 Subject: [PATCH 461/509] ACPI: video: Add backlight=native DMI quirk for Apple iMac11,3 [ Upstream commit 48436f2e9834b46b47b038b605c8142a1c07bc85 ] Linux defaults to picking the non-working ACPI video backlight interface on the Apple iMac11,3 . Add a DMI quirk to pick the working native radeon_bl0 interface instead. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/video_detect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 038542b3a80a..872b5351f0d8 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -332,6 +332,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82BK"), }, }, + { + .callback = video_detect_force_native, + /* Apple iMac11,3 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac11,3"), + }, + }, { /* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */ .callback = video_detect_force_native, From 71e3f2354072c98921f9a3f89f3aad171c3575c6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 17 May 2023 11:23:59 +0200 Subject: [PATCH 462/509] ACPI: video: Add backlight=native DMI quirk for Lenovo ThinkPad X131e (3371 AMD version) [ Upstream commit bd5d93df86a7ddf98a2a37e9c3751e3cb334a66c ] Linux defaults to picking the non-working ACPI video backlight interface on the Lenovo ThinkPad X131e (3371 AMD version). Add a DMI quirk to pick the working native radeon_bl0 interface instead. Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/video_detect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 872b5351f0d8..b02d381e7848 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -332,6 +332,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "82BK"), }, }, + { + .callback = video_detect_force_native, + /* Lenovo ThinkPad X131e (3371 AMD version) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "3371"), + }, + }, { .callback = video_detect_force_native, /* Apple iMac11,3 */ From c71d6934c6ac40a97146a410e0320768c7b1bb3c Mon Sep 17 00:00:00 2001 From: Youngmin Nam Date: Mon, 24 Apr 2023 10:04:36 +0900 Subject: [PATCH 463/509] arm64: set __exception_irq_entry with __irq_entry as a default [ Upstream commit f6794950f0e5ba37e3bbedda4d6ab0aad7395dd3 ] filter_irq_stacks() is supposed to cut entries which are related irq entries from its call stack. And in_irqentry_text() which is called by filter_irq_stacks() uses __irqentry_text_start/end symbol to find irq entries in callstack. But it doesn't work correctly as without "CONFIG_FUNCTION_GRAPH_TRACER", arm64 kernel doesn't include gic_handle_irq which is entry point of arm64 irq between __irqentry_text_start and __irqentry_text_end as we discussed in below link. https://lore.kernel.org/all/CACT4Y+aReMGLYua2rCLHgFpS9io5cZC04Q8GLs-uNmrn1ezxYQ@mail.gmail.com/#t This problem can makes unintentional deep call stack entries especially in KASAN enabled situation as below. [ 2479.383395]I[0:launcher-loader: 1719] Stack depot reached limit capacity [ 2479.383538]I[0:launcher-loader: 1719] WARNING: CPU: 0 PID: 1719 at lib/stackdepot.c:129 __stack_depot_save+0x464/0x46c [ 2479.385693]I[0:launcher-loader: 1719] pstate: 624000c5 (nZCv daIF +PAN -UAO +TCO -DIT -SSBS BTYPE=--) [ 2479.385724]I[0:launcher-loader: 1719] pc : __stack_depot_save+0x464/0x46c [ 2479.385751]I[0:launcher-loader: 1719] lr : __stack_depot_save+0x460/0x46c [ 2479.385774]I[0:launcher-loader: 1719] sp : ffffffc0080073c0 [ 2479.385793]I[0:launcher-loader: 1719] x29: ffffffc0080073e0 x28: ffffffd00b78a000 x27: 0000000000000000 [ 2479.385839]I[0:launcher-loader: 1719] x26: 000000000004d1dd x25: ffffff891474f000 x24: 00000000ca64d1dd [ 2479.385882]I[0:launcher-loader: 1719] x23: 0000000000000200 x22: 0000000000000220 x21: 0000000000000040 [ 2479.385925]I[0:launcher-loader: 1719] x20: ffffffc008007440 x19: 0000000000000000 x18: 0000000000000000 [ 2479.385969]I[0:launcher-loader: 1719] x17: 2065726568207475 x16: 000000000000005e x15: 2d2d2d2d2d2d2d20 [ 2479.386013]I[0:launcher-loader: 1719] x14: 5d39313731203a72 x13: 00000000002f6b30 x12: 00000000002f6af8 [ 2479.386057]I[0:launcher-loader: 1719] x11: 00000000ffffffff x10: ffffffb90aacf000 x9 : e8a74a6c16008800 [ 2479.386101]I[0:launcher-loader: 1719] x8 : e8a74a6c16008800 x7 : 00000000002f6b30 x6 : 00000000002f6af8 [ 2479.386145]I[0:launcher-loader: 1719] x5 : ffffffc0080070c8 x4 : ffffffd00b192380 x3 : ffffffd0092b313c [ 2479.386189]I[0:launcher-loader: 1719] x2 : 0000000000000001 x1 : 0000000000000004 x0 : 0000000000000022 [ 2479.386231]I[0:launcher-loader: 1719] Call trace: [ 2479.386248]I[0:launcher-loader: 1719] __stack_depot_save+0x464/0x46c [ 2479.386273]I[0:launcher-loader: 1719] kasan_save_stack+0x58/0x70 [ 2479.386303]I[0:launcher-loader: 1719] save_stack_info+0x34/0x138 [ 2479.386331]I[0:launcher-loader: 1719] kasan_save_free_info+0x18/0x24 [ 2479.386358]I[0:launcher-loader: 1719] ____kasan_slab_free+0x16c/0x170 [ 2479.386385]I[0:launcher-loader: 1719] __kasan_slab_free+0x10/0x20 [ 2479.386410]I[0:launcher-loader: 1719] kmem_cache_free+0x238/0x53c [ 2479.386435]I[0:launcher-loader: 1719] mempool_free_slab+0x1c/0x28 [ 2479.386460]I[0:launcher-loader: 1719] mempool_free+0x7c/0x1a0 [ 2479.386484]I[0:launcher-loader: 1719] bvec_free+0x34/0x80 [ 2479.386514]I[0:launcher-loader: 1719] bio_free+0x60/0x98 [ 2479.386540]I[0:launcher-loader: 1719] bio_put+0x50/0x21c [ 2479.386567]I[0:launcher-loader: 1719] f2fs_write_end_io+0x4ac/0x4d0 [ 2479.386594]I[0:launcher-loader: 1719] bio_endio+0x2dc/0x300 [ 2479.386622]I[0:launcher-loader: 1719] __dm_io_complete+0x324/0x37c [ 2479.386650]I[0:launcher-loader: 1719] dm_io_dec_pending+0x60/0xa4 [ 2479.386676]I[0:launcher-loader: 1719] clone_endio+0xf8/0x2f0 [ 2479.386700]I[0:launcher-loader: 1719] bio_endio+0x2dc/0x300 [ 2479.386727]I[0:launcher-loader: 1719] blk_update_request+0x258/0x63c [ 2479.386754]I[0:launcher-loader: 1719] scsi_end_request+0x50/0x304 [ 2479.386782]I[0:launcher-loader: 1719] scsi_io_completion+0x88/0x160 [ 2479.386808]I[0:launcher-loader: 1719] scsi_finish_command+0x17c/0x194 [ 2479.386833]I[0:launcher-loader: 1719] scsi_complete+0xcc/0x158 [ 2479.386859]I[0:launcher-loader: 1719] blk_mq_complete_request+0x4c/0x5c [ 2479.386885]I[0:launcher-loader: 1719] scsi_done_internal+0xf4/0x1e0 [ 2479.386910]I[0:launcher-loader: 1719] scsi_done+0x14/0x20 [ 2479.386935]I[0:launcher-loader: 1719] ufshcd_compl_one_cqe+0x578/0x71c [ 2479.386963]I[0:launcher-loader: 1719] ufshcd_mcq_poll_cqe_nolock+0xc8/0x150 [ 2479.386991]I[0:launcher-loader: 1719] ufshcd_intr+0x868/0xc0c [ 2479.387017]I[0:launcher-loader: 1719] __handle_irq_event_percpu+0xd0/0x348 [ 2479.387044]I[0:launcher-loader: 1719] handle_irq_event_percpu+0x24/0x74 [ 2479.387068]I[0:launcher-loader: 1719] handle_irq_event+0x74/0xe0 [ 2479.387091]I[0:launcher-loader: 1719] handle_fasteoi_irq+0x174/0x240 [ 2479.387118]I[0:launcher-loader: 1719] handle_irq_desc+0x7c/0x2c0 [ 2479.387147]I[0:launcher-loader: 1719] generic_handle_domain_irq+0x1c/0x28 [ 2479.387174]I[0:launcher-loader: 1719] gic_handle_irq+0x64/0x158 [ 2479.387204]I[0:launcher-loader: 1719] call_on_irq_stack+0x2c/0x54 [ 2479.387231]I[0:launcher-loader: 1719] do_interrupt_handler+0x70/0xa0 [ 2479.387258]I[0:launcher-loader: 1719] el1_interrupt+0x34/0x68 [ 2479.387283]I[0:launcher-loader: 1719] el1h_64_irq_handler+0x18/0x24 [ 2479.387308]I[0:launcher-loader: 1719] el1h_64_irq+0x68/0x6c [ 2479.387332]I[0:launcher-loader: 1719] blk_attempt_bio_merge+0x8/0x170 [ 2479.387356]I[0:launcher-loader: 1719] blk_mq_attempt_bio_merge+0x78/0x98 [ 2479.387383]I[0:launcher-loader: 1719] blk_mq_submit_bio+0x324/0xa40 [ 2479.387409]I[0:launcher-loader: 1719] __submit_bio+0x104/0x138 [ 2479.387436]I[0:launcher-loader: 1719] submit_bio_noacct_nocheck+0x1d0/0x4a0 [ 2479.387462]I[0:launcher-loader: 1719] submit_bio_noacct+0x618/0x804 [ 2479.387487]I[0:launcher-loader: 1719] submit_bio+0x164/0x180 [ 2479.387511]I[0:launcher-loader: 1719] f2fs_submit_read_bio+0xe4/0x1c4 [ 2479.387537]I[0:launcher-loader: 1719] f2fs_mpage_readpages+0x888/0xa4c [ 2479.387563]I[0:launcher-loader: 1719] f2fs_readahead+0xd4/0x19c [ 2479.387587]I[0:launcher-loader: 1719] read_pages+0xb0/0x4ac [ 2479.387614]I[0:launcher-loader: 1719] page_cache_ra_unbounded+0x238/0x288 [ 2479.387642]I[0:launcher-loader: 1719] do_page_cache_ra+0x60/0x6c [ 2479.387669]I[0:launcher-loader: 1719] page_cache_ra_order+0x318/0x364 [ 2479.387695]I[0:launcher-loader: 1719] ondemand_readahead+0x30c/0x3d8 [ 2479.387722]I[0:launcher-loader: 1719] page_cache_sync_ra+0xb4/0xc8 [ 2479.387749]I[0:launcher-loader: 1719] filemap_read+0x268/0xd24 [ 2479.387777]I[0:launcher-loader: 1719] f2fs_file_read_iter+0x1a0/0x62c [ 2479.387806]I[0:launcher-loader: 1719] vfs_read+0x258/0x34c [ 2479.387831]I[0:launcher-loader: 1719] ksys_pread64+0x8c/0xd0 [ 2479.387857]I[0:launcher-loader: 1719] __arm64_sys_pread64+0x48/0x54 [ 2479.387881]I[0:launcher-loader: 1719] invoke_syscall+0x58/0x158 [ 2479.387909]I[0:launcher-loader: 1719] el0_svc_common+0xf0/0x134 [ 2479.387935]I[0:launcher-loader: 1719] do_el0_svc+0x44/0x114 [ 2479.387961]I[0:launcher-loader: 1719] el0_svc+0x2c/0x80 [ 2479.387985]I[0:launcher-loader: 1719] el0t_64_sync_handler+0x48/0x114 [ 2479.388010]I[0:launcher-loader: 1719] el0t_64_sync+0x190/0x194 [ 2479.388038]I[0:launcher-loader: 1719] Kernel panic - not syncing: kernel: panic_on_warn set ... So let's set __exception_irq_entry with __irq_entry as a default. Applying this patch, we can see gic_hande_irq is included in Systemp.map as below. * Before ffffffc008010000 T __do_softirq ffffffc008010000 T __irqentry_text_end ffffffc008010000 T __irqentry_text_start ffffffc008010000 T __softirqentry_text_start ffffffc008010000 T _stext ffffffc00801066c T __softirqentry_text_end ffffffc008010670 T __entry_text_start * After ffffffc008010000 T __irqentry_text_start ffffffc008010000 T _stext ffffffc008010000 t gic_handle_irq ffffffc00801013c t gic_handle_irq ffffffc008010294 T __irqentry_text_end ffffffc008010298 T __do_softirq ffffffc008010298 T __softirqentry_text_start ffffffc008010904 T __softirqentry_text_end ffffffc008010908 T __entry_text_start Signed-off-by: Youngmin Nam Signed-off-by: SEO HOYOUNG Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20230424010436.779733-1-youngmin.nam@samsung.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/exception.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 0756191f44f6..59c3facb8a56 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -8,16 +8,11 @@ #define __ASM_EXCEPTION_H #include -#include #include #include -#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry -#else -#define __exception_irq_entry __kprobes -#endif static inline u32 disr_to_esr(u64 disr) { From 32020fc2a8373d3de35ae6d029d5969a42651e7a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 15 Jun 2023 11:26:28 +0100 Subject: [PATCH 464/509] arm64: mm: fix VA-range sanity check [ Upstream commit ab9b4008092c86dc12497af155a0901cc1156999 ] Both create_mapping_noalloc() and update_mapping_prot() sanity-check their 'virt' parameter, but the check itself doesn't make much sense. The condition used today appears to be a historical accident. The sanity-check condition: if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { [ ... warning here ... ] return; } ... can only be true for the KASAN shadow region or the module region, and there's no reason to exclude these specifically for creating and updateing mappings. When arm64 support was first upstreamed in commit: c1cc1552616d0f35 ("arm64: MMU initialisation") ... the condition was: if (virt < VMALLOC_START) { [ ... warning here ... ] return; } At the time, VMALLOC_START was the lowest kernel address, and this was checking whether 'virt' would be translated via TTBR1. Subsequently in commit: 14c127c957c1c607 ("arm64: mm: Flip kernel VA space") ... the condition was changed to: if ((virt >= VA_START) && (virt < VMALLOC_START)) { [ ... warning here ... ] return; } This appear to have been a thinko. The commit moved the linear map to the bottom of the kernel address space, with VMALLOC_START being at the halfway point. The old condition would warn for changes to the linear map below this, and at the time VA_START was the end of the linear map. Subsequently we cleaned up the naming of VA_START in commit: 77ad4ce69321abbe ("arm64: memory: rename VA_START to PAGE_END") ... keeping the erroneous condition as: if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { [ ... warning here ... ] return; } Correct the condition to check against the start of the TTBR1 address space, which is currently PAGE_OFFSET. This simplifies the logic, and more clearly matches the "outside kernel range" message in the warning. Signed-off-by: Mark Rutland Cc: Russell King Cc: Steve Capper Cc: Will Deacon Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230615102628.1052103-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/mm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3284709ef567..78f9fb638c9c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -421,7 +421,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + if (virt < PAGE_OFFSET) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -448,7 +448,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { + if (virt < PAGE_OFFSET) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; From 6b0c79aa33075b34c3cdcea4132c0afb3fc42d68 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Tue, 30 May 2023 16:25:07 +0800 Subject: [PATCH 465/509] sched/fair: Don't balance task to its current running CPU [ Upstream commit 0dd37d6dd33a9c23351e6115ae8cdac7863bc7de ] We've run into the case that the balancer tries to balance a migration disabled task and trigger the warning in set_task_cpu() like below: ------------[ cut here ]------------ WARNING: CPU: 7 PID: 0 at kernel/sched/core.c:3115 set_task_cpu+0x188/0x240 Modules linked in: hclgevf xt_CHECKSUM ipt_REJECT nf_reject_ipv4 <...snip> CPU: 7 PID: 0 Comm: swapper/7 Kdump: loaded Tainted: G O 6.1.0-rc4+ #1 Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V5.B221.01 12/09/2021 pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : set_task_cpu+0x188/0x240 lr : load_balance+0x5d0/0xc60 sp : ffff80000803bc70 x29: ffff80000803bc70 x28: ffff004089e190e8 x27: ffff004089e19040 x26: ffff007effcabc38 x25: 0000000000000000 x24: 0000000000000001 x23: ffff80000803be84 x22: 000000000000000c x21: ffffb093e79e2a78 x20: 000000000000000c x19: ffff004089e19040 x18: 0000000000000000 x17: 0000000000001fad x16: 0000000000000030 x15: 0000000000000000 x14: 0000000000000003 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000001 x10: 0000000000000400 x9 : ffffb093e4cee530 x8 : 00000000fffffffe x7 : 0000000000ce168a x6 : 000000000000013e x5 : 00000000ffffffe1 x4 : 0000000000000001 x3 : 0000000000000b2a x2 : 0000000000000b2a x1 : ffffb093e6d6c510 x0 : 0000000000000001 Call trace: set_task_cpu+0x188/0x240 load_balance+0x5d0/0xc60 rebalance_domains+0x26c/0x380 _nohz_idle_balance.isra.0+0x1e0/0x370 run_rebalance_domains+0x6c/0x80 __do_softirq+0x128/0x3d8 ____do_softirq+0x18/0x24 call_on_irq_stack+0x2c/0x38 do_softirq_own_stack+0x24/0x3c __irq_exit_rcu+0xcc/0xf4 irq_exit_rcu+0x18/0x24 el1_interrupt+0x4c/0xe4 el1h_64_irq_handler+0x18/0x2c el1h_64_irq+0x74/0x78 arch_cpu_idle+0x18/0x4c default_idle_call+0x58/0x194 do_idle+0x244/0x2b0 cpu_startup_entry+0x30/0x3c secondary_start_kernel+0x14c/0x190 __secondary_switched+0xb0/0xb4 ---[ end trace 0000000000000000 ]--- Further investigation shows that the warning is superfluous, the migration disabled task is just going to be migrated to its current running CPU. This is because that on load balance if the dst_cpu is not allowed by the task, we'll re-select a new_dst_cpu as a candidate. If no task can be balanced to dst_cpu we'll try to balance the task to the new_dst_cpu instead. In this case when the migration disabled task is not on CPU it only allows to run on its current CPU, load balance will select its current CPU as new_dst_cpu and later triggers the warning above. The new_dst_cpu is chosen from the env->dst_grpmask. Currently it contains CPUs in sched_group_span() and if we have overlapped groups it's possible to run into this case. This patch makes env->dst_grpmask of group_balance_mask() which exclude any CPUs from the busiest group and solve the issue. For balancing in a domain with no overlapped groups the behaviour keeps same as before. Suggested-by: Vincent Guittot Signed-off-by: Yicong Yang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20230530082507.10444-1-yangyicong@huawei.com Signed-off-by: Sasha Levin --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 45c1d03aff73..d53f57ac7609 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9883,7 +9883,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, .sd = sd, .dst_cpu = this_cpu, .dst_rq = this_rq, - .dst_grpmask = sched_group_span(sd->groups), + .dst_grpmask = group_balance_mask(sd->groups), .idle = idle, .loop_break = sched_nr_migrate_break, .cpus = cpus, From 532f8bac60419eb28158770470b9bb655de207c8 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Fri, 21 Apr 2023 16:54:45 +0200 Subject: [PATCH 466/509] wifi: ath11k: fix registration of 6Ghz-only phy without the full channel range [ Upstream commit e2ceb1de2f83aafd8003f0b72dfd4b7441e97d14 ] Because of what seems to be a typo, a 6Ghz-only phy for which the BDF does not allow the 7115Mhz channel will fail to register: WARNING: CPU: 2 PID: 106 at net/wireless/core.c:907 wiphy_register+0x914/0x954 Modules linked in: ath11k_pci sbsa_gwdt CPU: 2 PID: 106 Comm: kworker/u8:5 Not tainted 6.3.0-rc7-next-20230418-00549-g1e096a17625a-dirty #9 Hardware name: Freebox V7R Board (DT) Workqueue: ath11k_qmi_driver_event ath11k_qmi_driver_event_work pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : wiphy_register+0x914/0x954 lr : ieee80211_register_hw+0x67c/0xc10 sp : ffffff800b123aa0 x29: ffffff800b123aa0 x28: 0000000000000000 x27: 0000000000000000 x26: 0000000000000000 x25: 0000000000000006 x24: ffffffc008d51418 x23: ffffffc008cb0838 x22: ffffff80176c2460 x21: 0000000000000168 x20: ffffff80176c0000 x19: ffffff80176c03e0 x18: 0000000000000014 x17: 00000000cbef338c x16: 00000000d2a26f21 x15: 00000000ad6bb85f x14: 0000000000000020 x13: 0000000000000020 x12: 00000000ffffffbd x11: 0000000000000208 x10: 00000000fffffdf7 x9 : ffffffc009394718 x8 : ffffff80176c0528 x7 : 000000007fffffff x6 : 0000000000000006 x5 : 0000000000000005 x4 : ffffff800b304284 x3 : ffffff800b304284 x2 : ffffff800b304d98 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: wiphy_register+0x914/0x954 ieee80211_register_hw+0x67c/0xc10 ath11k_mac_register+0x7c4/0xe10 ath11k_core_qmi_firmware_ready+0x1f4/0x570 ath11k_qmi_driver_event_work+0x198/0x590 process_one_work+0x1b8/0x328 worker_thread+0x6c/0x414 kthread+0x100/0x104 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- ath11k_pci 0002:01:00.0: ieee80211 registration failed: -22 ath11k_pci 0002:01:00.0: failed register the radio with mac80211: -22 ath11k_pci 0002:01:00.0: failed to create pdev core: -22 Signed-off-by: Maxime Bizon Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230421145445.2612280-1-mbizon@freebox.fr Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath11k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 67faf62999de..3170c54c97b7 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -6044,7 +6044,7 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar, } if (supported_bands & WMI_HOST_WLAN_5G_CAP) { - if (reg_cap->high_5ghz_chan >= ATH11K_MAX_6G_FREQ) { + if (reg_cap->high_5ghz_chan >= ATH11K_MIN_6G_FREQ) { channels = kmemdup(ath11k_6ghz_channels, sizeof(ath11k_6ghz_channels), GFP_KERNEL); if (!channels) { From b6d9a4062c944ad095b34dc112bf646a84156f60 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 10 May 2023 21:37:48 -0700 Subject: [PATCH 467/509] bpf: Address KCSAN report on bpf_lru_list [ Upstream commit ee9fd0ac3017c4313be91a220a9ac4c99dde7ad4 ] KCSAN reported a data-race when accessing node->ref. Although node->ref does not have to be accurate, take this chance to use a more common READ_ONCE() and WRITE_ONCE() pattern instead of data_race(). There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref(). This patch also adds bpf_lru_node_clear_ref() to do the WRITE_ONCE(node->ref, 0) also. ================================================================== BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1: __bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline] __bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline] __bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240 bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline] bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline] bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499 prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline] __htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316 bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313 bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200 generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687 bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534 __sys_bpf+0x338/0x810 __do_sys_bpf kernel/bpf/syscall.c:5096 [inline] __se_sys_bpf kernel/bpf/syscall.c:5094 [inline] __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0: bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline] __htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332 bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313 bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200 generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687 bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534 __sys_bpf+0x338/0x810 __do_sys_bpf kernel/bpf/syscall.c:5096 [inline] __se_sys_bpf kernel/bpf/syscall.c:5094 [inline] __x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x01 -> 0x00 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023 ================================================================== Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com Signed-off-by: Martin KaFai Lau Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20230511043748.1384166-1-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/bpf_lru_list.c | 21 +++++++++++++-------- kernel/bpf/bpf_lru_list.h | 7 ++----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index d99e89f113c4..3dabdd137d10 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -41,7 +41,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l) /* bpf_lru_node helpers */ static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node) { - return node->ref; + return READ_ONCE(node->ref); +} + +static void bpf_lru_node_clear_ref(struct bpf_lru_node *node) +{ + WRITE_ONCE(node->ref, 0); } static void bpf_lru_list_count_inc(struct bpf_lru_list *l, @@ -89,7 +94,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l, bpf_lru_list_count_inc(l, tgt_type); node->type = tgt_type; - node->ref = 0; + bpf_lru_node_clear_ref(node); list_move(&node->list, &l->lists[tgt_type]); } @@ -110,7 +115,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l, bpf_lru_list_count_inc(l, tgt_type); node->type = tgt_type; } - node->ref = 0; + bpf_lru_node_clear_ref(node); /* If the moving node is the next_inactive_rotation candidate, * move the next_inactive_rotation pointer also. @@ -353,7 +358,7 @@ static void __local_list_add_pending(struct bpf_lru *lru, *(u32 *)((void *)node + lru->hash_offset) = hash; node->cpu = cpu; node->type = BPF_LRU_LOCAL_LIST_T_PENDING; - node->ref = 0; + bpf_lru_node_clear_ref(node); list_add(&node->list, local_pending_list(loc_l)); } @@ -419,7 +424,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru, if (!list_empty(free_list)) { node = list_first_entry(free_list, struct bpf_lru_node, list); *(u32 *)((void *)node + lru->hash_offset) = hash; - node->ref = 0; + bpf_lru_node_clear_ref(node); __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); } @@ -522,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru, } node->type = BPF_LRU_LOCAL_LIST_T_FREE; - node->ref = 0; + bpf_lru_node_clear_ref(node); list_move(&node->list, local_free_list(loc_l)); raw_spin_unlock_irqrestore(&loc_l->lock, flags); @@ -568,7 +573,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, node = (struct bpf_lru_node *)(buf + node_offset); node->type = BPF_LRU_LIST_T_FREE; - node->ref = 0; + bpf_lru_node_clear_ref(node); list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); buf += elem_size; } @@ -594,7 +599,7 @@ static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, node = (struct bpf_lru_node *)(buf + node_offset); node->cpu = cpu; node->type = BPF_LRU_LIST_T_FREE; - node->ref = 0; + bpf_lru_node_clear_ref(node); list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]); i++; buf += elem_size; diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index 6b12f06ee18c..9c12ee453c61 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -63,11 +63,8 @@ struct bpf_lru { static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node) { - /* ref is an approximation on access frequency. It does not - * have to be very accurate. Hence, no protection is used. - */ - if (!node->ref) - node->ref = 1; + if (!READ_ONCE(node->ref)) + WRITE_ONCE(node->ref, 1); } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, From 2864cc9a1fd13666ed7fd9064dc3f2c51a85de32 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Thu, 15 Jun 2023 11:54:47 +0200 Subject: [PATCH 468/509] devlink: report devlink_port_type_warn source device [ Upstream commit a52305a81d6bb74b90b400dfa56455d37872fe4b ] devlink_port_type_warn is scheduled for port devlink and warning when the port type is not set. But from this warning it is not easy found out which device (driver) has no devlink port set. [ 3709.975552] Type was not set for devlink port. [ 3709.975579] WARNING: CPU: 1 PID: 13092 at net/devlink/leftover.c:6775 devlink_port_type_warn+0x11/0x20 [ 3709.993967] Modules linked in: openvswitch nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink bluetooth rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs vhost_net vhost vhost_iotlb tap tun bridge stp llc qrtr intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm x86_pkg_temp_thermal mlx5_ib intel_powerclamp coretemp dell_wmi ledtrig_audio sparse_keymap ipmi_ssif kvm_intel ib_uverbs rfkill ib_core video kvm iTCO_wdt acpi_ipmi intel_vsec irqbypass ipmi_si iTCO_vendor_support dcdbas ipmi_devintf mei_me ipmi_msghandler rapl mei intel_cstate isst_if_mmio isst_if_mbox_pci dell_smbios intel_uncore isst_if_common i2c_i801 dell_wmi_descriptor wmi_bmof i2c_smbus intel_pch_thermal pcspkr acpi_power_meter xfs libcrc32c sd_mod sg nvme_tcp mgag200 i2c_algo_bit nvme_fabrics drm_shmem_helper drm_kms_helper nvme syscopyarea ahci sysfillrect sysimgblt nvme_core fb_sys_fops crct10dif_pclmul libahci mlx5_core sfc crc32_pclmul nvme_common drm [ 3709.994030] crc32c_intel mtd t10_pi mlxfw libata tg3 mdio megaraid_sas psample ghash_clmulni_intel pci_hyperv_intf wmi dm_multipath sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 tls libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse [ 3710.108431] CPU: 1 PID: 13092 Comm: kworker/1:1 Kdump: loaded Not tainted 5.14.0-319.el9.x86_64 #1 [ 3710.108435] Hardware name: Dell Inc. PowerEdge R750/0PJ80M, BIOS 1.8.2 09/14/2022 [ 3710.108437] Workqueue: events devlink_port_type_warn [ 3710.108440] RIP: 0010:devlink_port_type_warn+0x11/0x20 [ 3710.108443] Code: 84 76 fe ff ff 48 c7 03 20 0e 1a ad 31 c0 e9 96 fd ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 48 c7 c7 18 24 4e ad e8 ef 71 62 ff <0f> 0b c3 cc cc cc cc 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 87 [ 3710.108445] RSP: 0018:ff3b6d2e8b3c7e90 EFLAGS: 00010282 [ 3710.108447] RAX: 0000000000000000 RBX: ff366d6580127080 RCX: 0000000000000027 [ 3710.108448] RDX: 0000000000000027 RSI: 00000000ffff86de RDI: ff366d753f41f8c8 [ 3710.108449] RBP: ff366d658ff5a0c0 R08: ff366d753f41f8c0 R09: ff3b6d2e8b3c7e18 [ 3710.108450] R10: 0000000000000001 R11: 0000000000000023 R12: ff366d753f430600 [ 3710.108451] R13: ff366d753f436900 R14: 0000000000000000 R15: ff366d753f436905 [ 3710.108452] FS: 0000000000000000(0000) GS:ff366d753f400000(0000) knlGS:0000000000000000 [ 3710.108453] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3710.108454] CR2: 00007f1c57bc74e0 CR3: 000000111d26a001 CR4: 0000000000773ee0 [ 3710.108456] PKRU: 55555554 [ 3710.108457] Call Trace: [ 3710.108458] [ 3710.108459] process_one_work+0x1e2/0x3b0 [ 3710.108466] ? rescuer_thread+0x390/0x390 [ 3710.108468] worker_thread+0x50/0x3a0 [ 3710.108471] ? rescuer_thread+0x390/0x390 [ 3710.108473] kthread+0xdd/0x100 [ 3710.108477] ? kthread_complete_and_exit+0x20/0x20 [ 3710.108479] ret_from_fork+0x1f/0x30 [ 3710.108485] [ 3710.108486] ---[ end trace 1b4b23cd0c65d6a0 ]--- After patch: [ 402.473064] ice 0000:41:00.0: Type was not set for devlink port. [ 402.473064] ice 0000:41:00.1: Type was not set for devlink port. Signed-off-by: Petr Oros Reviewed-by: Pavan Chebbi Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20230615095447.8259-1-poros@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/devlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 72047750dcd9..00c6944ed634 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8092,7 +8092,10 @@ EXPORT_SYMBOL_GPL(devlink_free); static void devlink_port_type_warn(struct work_struct *work) { - WARN(true, "Type was not set for devlink port."); + struct devlink_port *port = container_of(to_delayed_work(work), + struct devlink_port, + type_warn_dw); + dev_warn(port->devlink->dev, "Type was not set for devlink port."); } static bool devlink_port_type_should_warn(struct devlink_port *devlink_port) From 85cf0d5f45cb73c4b67b87152fe5481092ed3b4b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 15 Jun 2023 12:04:07 -0600 Subject: [PATCH 469/509] wifi: wext-core: Fix -Wstringop-overflow warning in ioctl_standard_iw_point() [ Upstream commit 71e7552c90db2a2767f5c17c7ec72296b0d92061 ] -Wstringop-overflow is legitimately warning us about extra_size pontentially being zero at some point, hence potenially ending up _allocating_ zero bytes of memory for extra pointer and then trying to access such object in a call to copy_from_user(). Fix this by adding a sanity check to ensure we never end up trying to allocate zero bytes of data for extra pointer, before continue executing the rest of the code in the function. Address the following -Wstringop-overflow warning seen when built m68k architecture with allyesconfig configuration: from net/wireless/wext-core.c:11: In function '_copy_from_user', inlined from 'copy_from_user' at include/linux/uaccess.h:183:7, inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:825:7: arch/m68k/include/asm/string.h:48:25: warning: '__builtin_memset' writing 1 or more bytes into a region of size 0 overflows the destination [-Wstringop-overflow=] 48 | #define memset(d, c, n) __builtin_memset(d, c, n) | ^~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/uaccess.h:153:17: note: in expansion of macro 'memset' 153 | memset(to + (n - res), 0, res); | ^~~~~~ In function 'kmalloc', inlined from 'kzalloc' at include/linux/slab.h:694:9, inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:819:10: include/linux/slab.h:577:16: note: at offset 1 into destination object of size 0 allocated by '__kmalloc' 577 | return __kmalloc(size, flags); | ^~~~~~~~~~~~~~~~~~~~~~ This help with the ongoing efforts to globally enable -Wstringop-overflow. Link: https://github.com/KSPP/linux/issues/315 Signed-off-by: Gustavo A. R. Silva Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/ZItSlzvIpjdjNfd8@work Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/wext-core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 76a80a41615b..a57f54bc0e1a 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -796,6 +796,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, } } + /* Sanity-check to ensure we never end up _allocating_ zero + * bytes of data for extra. + */ + if (extra_size <= 0) + return -EFAULT; + /* kzalloc() ensures NULL-termination for essid_compat. */ extra = kzalloc(extra_size, GFP_KERNEL); if (!extra) From a956c3af70fad096654a06d9dceb4e5641e29f08 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 20 Jun 2023 13:04:02 +0300 Subject: [PATCH 470/509] wifi: iwlwifi: mvm: avoid baid size integer overflow [ Upstream commit 1a528ab1da324d078ec60283c34c17848580df24 ] Roee reported various hard-to-debug crashes with pings in EHT aggregation scenarios. Enabling KASAN showed that we access the BAID allocation out of bounds, and looking at the code a bit shows that since the reorder buffer entry (struct iwl_mvm_reorder_buf_entry) is 128 bytes if debug such as lockdep is enabled, then staring from an agg size 512 we overflow the size calculation, and allocate a much smaller structure than we should, causing slab corruption once we initialize this. Fix this by simply using u32 instead of u16. Reported-by: Roee Goldfiner Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Link: https://lore.kernel.org/r/20230620125813.f428c856030d.I2c2bb808e945adb71bc15f5b2bac2d8957ea90eb@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 09f870c48a4f..141581fa74c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2590,7 +2590,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, } if (iwl_mvm_has_new_rx_api(mvm) && start) { - u16 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]); + u32 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]); /* sparse doesn't like the __align() so don't check */ #ifndef __CHECKER__ From 994c2ceb70ea99264ccc6f09e6703ca267dad63c Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Tue, 20 Jun 2023 10:47:32 -0700 Subject: [PATCH 471/509] igb: Fix igb_down hung on surprise removal [ Upstream commit 004d25060c78fc31f66da0fa439c544dda1ac9d5 ] In a setup where a Thunderbolt hub connects to Ethernet and a display through USB Type-C, users may experience a hung task timeout when they remove the cable between the PC and the Thunderbolt hub. This is because the igb_down function is called multiple times when the Thunderbolt hub is unplugged. For example, the igb_io_error_detected triggers the first call, and the igb_remove triggers the second call. The second call to igb_down will block at napi_synchronize. Here's the call trace: __schedule+0x3b0/0xddb ? __mod_timer+0x164/0x5d3 schedule+0x44/0xa8 schedule_timeout+0xb2/0x2a4 ? run_local_timers+0x4e/0x4e msleep+0x31/0x38 igb_down+0x12c/0x22a [igb 6615058754948bfde0bf01429257eb59f13030d4] __igb_close+0x6f/0x9c [igb 6615058754948bfde0bf01429257eb59f13030d4] igb_close+0x23/0x2b [igb 6615058754948bfde0bf01429257eb59f13030d4] __dev_close_many+0x95/0xec dev_close_many+0x6e/0x103 unregister_netdevice_many+0x105/0x5b1 unregister_netdevice_queue+0xc2/0x10d unregister_netdev+0x1c/0x23 igb_remove+0xa7/0x11c [igb 6615058754948bfde0bf01429257eb59f13030d4] pci_device_remove+0x3f/0x9c device_release_driver_internal+0xfe/0x1b4 pci_stop_bus_device+0x5b/0x7f pci_stop_bus_device+0x30/0x7f pci_stop_bus_device+0x30/0x7f pci_stop_and_remove_bus_device+0x12/0x19 pciehp_unconfigure_device+0x76/0xe9 pciehp_disable_slot+0x6e/0x131 pciehp_handle_presence_or_link_change+0x7a/0x3f7 pciehp_ist+0xbe/0x194 irq_thread_fn+0x22/0x4d ? irq_thread+0x1fd/0x1fd irq_thread+0x17b/0x1fd ? irq_forced_thread_fn+0x5f/0x5f kthread+0x142/0x153 ? __irq_get_irqchip_state+0x46/0x46 ? kthread_associate_blkcg+0x71/0x71 ret_from_fork+0x1f/0x30 In this case, igb_io_error_detected detaches the network interface and requests a PCIE slot reset, however, the PCIE reset callback is not being invoked and thus the Ethernet connection breaks down. As the PCIE error in this case is a non-fatal one, requesting a slot reset can be avoided. This patch fixes the task hung issue and preserves Ethernet connection by ignoring non-fatal PCIE errors. Signed-off-by: Ying Hsu Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230620174732.4145155-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index c5f465814dec..446598210012 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -9453,6 +9453,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); + if (state == pci_channel_io_normal) { + dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n"); + return PCI_ERS_RESULT_CAN_RECOVER; + } + netif_device_detach(netdev); if (state == pci_channel_io_perm_failure) From 6e88cc510f2706462210abb6adce4bdf8521a6cf Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 29 Jun 2023 09:14:52 +0200 Subject: [PATCH 472/509] spi: bcm63xx: fix max prepend length [ Upstream commit 5158814cbb37bbb38344b3ecddc24ba2ed0365f2 ] The command word is defined as following: /* Command */ #define SPI_CMD_COMMAND_SHIFT 0 #define SPI_CMD_DEVICE_ID_SHIFT 4 #define SPI_CMD_PREPEND_BYTE_CNT_SHIFT 8 #define SPI_CMD_ONE_BYTE_SHIFT 11 #define SPI_CMD_ONE_WIRE_SHIFT 12 If the prepend byte count field starts at bit 8, and the next defined bit is SPI_CMD_ONE_BYTE at bit 11, it can be at most 3 bits wide, and thus the max value is 7, not 15. Fixes: b17de076062a ("spi/bcm63xx: work around inability to keep CS up") Signed-off-by: Jonas Gorski Link: https://lore.kernel.org/r/20230629071453.62024-1-jonas.gorski@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm63xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index 96d075e633f4..d36384fef0d7 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -126,7 +126,7 @@ enum bcm63xx_regs_spi { SPI_MSG_DATA_SIZE, }; -#define BCM63XX_SPI_MAX_PREPEND 15 +#define BCM63XX_SPI_MAX_PREPEND 7 #define BCM63XX_SPI_MAX_CS 8 #define BCM63XX_SPI_BUS_NUM 0 From 7041605e8594c41149e69222f8a42250cc42d2ef Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Wed, 28 Jun 2023 15:24:37 +0200 Subject: [PATCH 473/509] fbdev: imxfb: warn about invalid left/right margin [ Upstream commit 4e47382fbca916d7db95cbf9e2d7ca2e9d1ca3fe ] Warn about invalid var->left_margin or var->right_margin. Their values are read from the device tree. We store var->left_margin-3 and var->right_margin-1 in register fields. These fields should be >= 0. Fixes: 7e8549bcee00 ("imxfb: Fix margin settings") Signed-off-by: Martin Kaiser Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/imxfb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c index 564bd0407ed8..d663e080b157 100644 --- a/drivers/video/fbdev/imxfb.c +++ b/drivers/video/fbdev/imxfb.c @@ -602,10 +602,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf if (var->hsync_len < 1 || var->hsync_len > 64) printk(KERN_ERR "%s: invalid hsync_len %d\n", info->fix.id, var->hsync_len); - if (var->left_margin > 255) + if (var->left_margin < 3 || var->left_margin > 255) printk(KERN_ERR "%s: invalid left_margin %d\n", info->fix.id, var->left_margin); - if (var->right_margin > 255) + if (var->right_margin < 1 || var->right_margin > 255) printk(KERN_ERR "%s: invalid right_margin %d\n", info->fix.id, var->right_margin); if (var->yres < 1 || var->yres > ymax_mask) From 54aa4c03861e125e77deb84358db4a8637af456c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 5 Jul 2023 08:30:03 -0500 Subject: [PATCH 474/509] pinctrl: amd: Use amd_pinconf_set() for all config options [ Upstream commit 635a750d958e158e17af0f524bedc484b27fbb93 ] On ASUS TUF A16 it is reported that the ITE5570 ACPI device connected to GPIO 7 is causing an interrupt storm. This issue doesn't happen on Windows. Comparing the GPIO register configuration between Windows and Linux bit 20 has been configured as a pull up on Windows, but not on Linux. Checking GPIO declaration from the firmware it is clear it *should* have been a pull up on Linux as well. ``` GpioInt (Level, ActiveLow, Exclusive, PullUp, 0x0000, "\\_SB.GPIO", 0x00, ResourceConsumer, ,) { // Pin list 0x0007 } ``` On Linux amd_gpio_set_config() is currently only used for programming the debounce. Actually the GPIO core calls it with all the arguments that are supported by a GPIO, pinctrl-amd just responds `-ENOTSUPP`. To solve this issue expand amd_gpio_set_config() to support the other arguments amd_pinconf_set() supports, namely `PIN_CONFIG_BIAS_PULL_DOWN`, `PIN_CONFIG_BIAS_PULL_UP`, and `PIN_CONFIG_DRIVE_STRENGTH`. Reported-by: Nik P Reported-by: Nathan Schulte Reported-by: Friedrich Vock Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217336 Reported-by: dridri85@gmail.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217493 Link: https://lore.kernel.org/linux-input/20230530154058.17594-1-friedrich.vock@gmx.de/ Tested-by: Jan Visser Fixes: 2956b5d94a76 ("pinctrl / gpio: Introduce .set_config() callback for GPIO chips") Signed-off-by: Mario Limonciello Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230705133005.577-3-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/pinctrl-amd.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 0d71151575ee..3a05ebb9aa25 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -189,18 +189,6 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, return ret; } -static int amd_gpio_set_config(struct gpio_chip *gc, unsigned offset, - unsigned long config) -{ - u32 debounce; - - if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE) - return -ENOTSUPP; - - debounce = pinconf_to_config_argument(config); - return amd_gpio_set_debounce(gc, offset, debounce); -} - #ifdef CONFIG_DEBUG_FS static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) { @@ -676,7 +664,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev, } static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, - unsigned long *configs, unsigned num_configs) + unsigned long *configs, unsigned int num_configs) { int i; u32 arg; @@ -766,6 +754,20 @@ static int amd_pinconf_group_set(struct pinctrl_dev *pctldev, return 0; } +static int amd_gpio_set_config(struct gpio_chip *gc, unsigned int pin, + unsigned long config) +{ + struct amd_gpio *gpio_dev = gpiochip_get_data(gc); + + if (pinconf_to_config_param(config) == PIN_CONFIG_INPUT_DEBOUNCE) { + u32 debounce = pinconf_to_config_argument(config); + + return amd_gpio_set_debounce(gc, pin, debounce); + } + + return amd_pinconf_set(gpio_dev->pctrl, pin, &config, 1); +} + static const struct pinconf_ops amd_pinconf_ops = { .pin_config_get = amd_pinconf_get, .pin_config_set = amd_pinconf_set, From f6d311b95394cdd0b661eeb7a2b395ef19ead9d8 Mon Sep 17 00:00:00 2001 From: Tanmay Patil Date: Wed, 12 Jul 2023 16:36:57 +0530 Subject: [PATCH 475/509] net: ethernet: ti: cpsw_ale: Fix cpsw_ale_get_field()/cpsw_ale_set_field() [ Upstream commit b685f1a58956fa36cc01123f253351b25bfacfda ] CPSW ALE has 75 bit ALE entries which are stored within three 32 bit words. The cpsw_ale_get_field() and cpsw_ale_set_field() functions assume that the field will be strictly contained within one word. However, this is not guaranteed to be the case and it is possible for ALE field entries to span across up to two words at the most. Fix the methods to handle getting/setting fields spanning up to two words. Fixes: db82173f23c5 ("netdev: driver: ethernet: add cpsw address lookup engine support") Signed-off-by: Tanmay Patil [s-vadapalli@ti.com: rephrased commit message and added Fixes tag] Signed-off-by: Siddharth Vadapalli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/cpsw_ale.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index a6a455c32628..73efc8b45364 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -104,23 +104,37 @@ struct cpsw_ale_dev_id { static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits) { - int idx; + int idx, idx2; + u32 hi_val = 0; idx = start / 32; + idx2 = (start + bits - 1) / 32; + /* Check if bits to be fetched exceed a word */ + if (idx != idx2) { + idx2 = 2 - idx2; /* flip */ + hi_val = ale_entry[idx2] << ((idx2 * 32) - start); + } start -= idx * 32; idx = 2 - idx; /* flip */ - return (ale_entry[idx] >> start) & BITMASK(bits); + return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits); } static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits, u32 value) { - int idx; + int idx, idx2; value &= BITMASK(bits); - idx = start / 32; + idx = start / 32; + idx2 = (start + bits - 1) / 32; + /* Check if bits to be set exceed a word */ + if (idx != idx2) { + idx2 = 2 - idx2; /* flip */ + ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32))); + ale_entry[idx2] |= (value >> ((idx2 * 32) - start)); + } start -= idx * 32; - idx = 2 - idx; /* flip */ + idx = 2 - idx; /* flip */ ale_entry[idx] &= ~(BITMASK(bits) << start); ale_entry[idx] |= (value << start); } From b37bc3b07eabe9853635b2accdcc9998a1bad0b1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 12 Jul 2023 08:44:49 -0700 Subject: [PATCH 476/509] bridge: Add extack warning when enabling STP in netns. [ Upstream commit 56a16035bb6effb37177867cea94c13a8382f745 ] When we create an L2 loop on a bridge in netns, we will see packets storm even if STP is enabled. # unshare -n # ip link add br0 type bridge # ip link add veth0 type veth peer name veth1 # ip link set veth0 master br0 up # ip link set veth1 master br0 up # ip link set br0 type bridge stp_state 1 # ip link set br0 up # sleep 30 # ip -s link show br0 2: br0: mtu 1500 qdisc noqueue state UP mode DEFAULT group default qlen 1000 link/ether b6:61:98:1c:1c:b5 brd ff:ff:ff:ff:ff:ff RX: bytes packets errors dropped missed mcast 956553768 12861249 0 0 0 12861249 <-. Keep TX: bytes packets errors dropped carrier collsns | increasing 1027834 11951 0 0 0 0 <-' rapidly This is because llc_rcv() drops all packets in non-root netns and BPDU is dropped. Let's add extack warning when enabling STP in netns. # unshare -n # ip link add br0 type bridge # ip link set br0 type bridge stp_state 1 Warning: bridge: STP does not work in non-root netns. Note this commit will be reverted later when we namespacify the whole LLC infra. Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe") Suggested-by: Harry Coin Link: https://lore.kernel.org/netdev/0f531295-e289-022d-5add-5ceffa0df9bc@quietfountain.com/ Suggested-by: Ido Schimmel Signed-off-by: Kuniyuki Iwashima Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/bridge/br_stp_if.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index ba55851fe132..3326dfced68a 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -201,6 +201,9 @@ int br_stp_set_enabled(struct net_bridge *br, unsigned long val, { ASSERT_RTNL(); + if (!net_eq(dev_net(br->dev), &init_net)) + NL_SET_ERR_MSG_MOD(extack, "STP does not work in non-root netns"); + if (br_mrp_enabled(br)) { NL_SET_ERR_MSG_MOD(extack, "STP can't be enabled if MRP is already enabled"); From a4635f190f332304db4a49e827ece790b804b5db Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Tue, 9 May 2023 19:11:47 +0800 Subject: [PATCH 477/509] iavf: Fix use-after-free in free_netdev [ Upstream commit 5f4fa1672d98fe99d2297b03add35346f1685d6b ] We do netif_napi_add() for all allocated q_vectors[], but potentially do netif_napi_del() for part of them, then kfree q_vectors and leave invalid pointers at dev->napi_list. Reproducer: [root@host ~]# cat repro.sh #!/bin/bash pf_dbsf="0000:41:00.0" vf0_dbsf="0000:41:02.0" g_pids=() function do_set_numvf() { echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs sleep $((RANDOM%3+1)) echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs sleep $((RANDOM%3+1)) } function do_set_channel() { local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } ifconfig $nic 192.168.18.5 netmask 255.255.255.0 ifconfig $nic up ethtool -L $nic combined 1 ethtool -L $nic combined 4 sleep $((RANDOM%3)) } function on_exit() { local pid for pid in "${g_pids[@]}"; do kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null done g_pids=() } trap "on_exit; exit" EXIT while :; do do_set_numvf ; done & g_pids+=($!) while :; do do_set_channel ; done & g_pids+=($!) wait Result: [ 4093.900222] ================================================================== [ 4093.900230] BUG: KASAN: use-after-free in free_netdev+0x308/0x390 [ 4093.900232] Read of size 8 at addr ffff88b4dc145640 by task repro.sh/6699 [ 4093.900233] [ 4093.900236] CPU: 10 PID: 6699 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 [ 4093.900238] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 [ 4093.900239] Call Trace: [ 4093.900244] dump_stack+0x71/0xab [ 4093.900249] print_address_description+0x6b/0x290 [ 4093.900251] ? free_netdev+0x308/0x390 [ 4093.900252] kasan_report+0x14a/0x2b0 [ 4093.900254] free_netdev+0x308/0x390 [ 4093.900261] iavf_remove+0x825/0xd20 [iavf] [ 4093.900265] pci_device_remove+0xa8/0x1f0 [ 4093.900268] device_release_driver_internal+0x1c6/0x460 [ 4093.900271] pci_stop_bus_device+0x101/0x150 [ 4093.900273] pci_stop_and_remove_bus_device+0xe/0x20 [ 4093.900275] pci_iov_remove_virtfn+0x187/0x420 [ 4093.900277] ? pci_iov_add_virtfn+0xe10/0xe10 [ 4093.900278] ? pci_get_subsys+0x90/0x90 [ 4093.900280] sriov_disable+0xed/0x3e0 [ 4093.900282] ? bus_find_device+0x12d/0x1a0 [ 4093.900290] i40e_free_vfs+0x754/0x1210 [i40e] [ 4093.900298] ? i40e_reset_all_vfs+0x880/0x880 [i40e] [ 4093.900299] ? pci_get_device+0x7c/0x90 [ 4093.900300] ? pci_get_subsys+0x90/0x90 [ 4093.900306] ? pci_vfs_assigned.part.7+0x144/0x210 [ 4093.900309] ? __mutex_lock_slowpath+0x10/0x10 [ 4093.900315] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] [ 4093.900318] sriov_numvfs_store+0x214/0x290 [ 4093.900320] ? sriov_totalvfs_show+0x30/0x30 [ 4093.900321] ? __mutex_lock_slowpath+0x10/0x10 [ 4093.900323] ? __check_object_size+0x15a/0x350 [ 4093.900326] kernfs_fop_write+0x280/0x3f0 [ 4093.900329] vfs_write+0x145/0x440 [ 4093.900330] ksys_write+0xab/0x160 [ 4093.900332] ? __ia32_sys_read+0xb0/0xb0 [ 4093.900334] ? fput_many+0x1a/0x120 [ 4093.900335] ? filp_close+0xf0/0x130 [ 4093.900338] do_syscall_64+0xa0/0x370 [ 4093.900339] ? page_fault+0x8/0x30 [ 4093.900341] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 4093.900357] RIP: 0033:0x7f16ad4d22c0 [ 4093.900359] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 [ 4093.900360] RSP: 002b:00007ffd6491b7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 4093.900362] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f16ad4d22c0 [ 4093.900363] RDX: 0000000000000002 RSI: 0000000001a41408 RDI: 0000000000000001 [ 4093.900364] RBP: 0000000001a41408 R08: 00007f16ad7a1780 R09: 00007f16ae1f2700 [ 4093.900364] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 [ 4093.900365] R13: 0000000000000001 R14: 00007f16ad7a0620 R15: 0000000000000001 [ 4093.900367] [ 4093.900368] Allocated by task 820: [ 4093.900371] kasan_kmalloc+0xa6/0xd0 [ 4093.900373] __kmalloc+0xfb/0x200 [ 4093.900376] iavf_init_interrupt_scheme+0x63b/0x1320 [iavf] [ 4093.900380] iavf_watchdog_task+0x3d51/0x52c0 [iavf] [ 4093.900382] process_one_work+0x56a/0x11f0 [ 4093.900383] worker_thread+0x8f/0xf40 [ 4093.900384] kthread+0x2a0/0x390 [ 4093.900385] ret_from_fork+0x1f/0x40 [ 4093.900387] 0xffffffffffffffff [ 4093.900387] [ 4093.900388] Freed by task 6699: [ 4093.900390] __kasan_slab_free+0x137/0x190 [ 4093.900391] kfree+0x8b/0x1b0 [ 4093.900394] iavf_free_q_vectors+0x11d/0x1a0 [iavf] [ 4093.900397] iavf_remove+0x35a/0xd20 [iavf] [ 4093.900399] pci_device_remove+0xa8/0x1f0 [ 4093.900400] device_release_driver_internal+0x1c6/0x460 [ 4093.900401] pci_stop_bus_device+0x101/0x150 [ 4093.900402] pci_stop_and_remove_bus_device+0xe/0x20 [ 4093.900403] pci_iov_remove_virtfn+0x187/0x420 [ 4093.900404] sriov_disable+0xed/0x3e0 [ 4093.900409] i40e_free_vfs+0x754/0x1210 [i40e] [ 4093.900415] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] [ 4093.900416] sriov_numvfs_store+0x214/0x290 [ 4093.900417] kernfs_fop_write+0x280/0x3f0 [ 4093.900418] vfs_write+0x145/0x440 [ 4093.900419] ksys_write+0xab/0x160 [ 4093.900420] do_syscall_64+0xa0/0x370 [ 4093.900421] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 4093.900422] 0xffffffffffffffff [ 4093.900422] [ 4093.900424] The buggy address belongs to the object at ffff88b4dc144200 which belongs to the cache kmalloc-8k of size 8192 [ 4093.900425] The buggy address is located 5184 bytes inside of 8192-byte region [ffff88b4dc144200, ffff88b4dc146200) [ 4093.900425] The buggy address belongs to the page: [ 4093.900427] page:ffffea00d3705000 refcount:1 mapcount:0 mapping:ffff88bf04415c80 index:0x0 compound_mapcount: 0 [ 4093.900430] flags: 0x10000000008100(slab|head) [ 4093.900433] raw: 0010000000008100 dead000000000100 dead000000000200 ffff88bf04415c80 [ 4093.900434] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000 [ 4093.900434] page dumped because: kasan: bad access detected [ 4093.900435] [ 4093.900435] Memory state around the buggy address: [ 4093.900436] ffff88b4dc145500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900437] ffff88b4dc145580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900438] >ffff88b4dc145600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900438] ^ [ 4093.900439] ffff88b4dc145680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900440] ffff88b4dc145700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 4093.900440] ================================================================== Although the patch #2 (of 2) can avoid the issue triggered by this repro.sh, there still are other potential risks that if num_active_queues is changed to less than allocated q_vectors[] by unexpected, the mismatched netif_napi_add/del() can also cause UAF. Since we actually call netif_napi_add() for all allocated q_vectors unconditionally in iavf_alloc_q_vectors(), so we should fix it by letting netif_napi_del() match to netif_napi_add(). Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Ding Hui Cc: Donglin Peng Cc: Huang Cun Reviewed-by: Simon Horman Reviewed-by: Madhu Chittim Reviewed-by: Leon Romanovsky Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index e45f3a1a11f3..b64801bc216b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1377,19 +1377,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) static void iavf_free_q_vectors(struct iavf_adapter *adapter) { int q_idx, num_q_vectors; - int napi_vectors; if (!adapter->q_vectors) return; num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; - napi_vectors = adapter->num_active_queues; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx]; - if (q_idx < napi_vectors) - netif_napi_del(&q_vector->napi); + netif_napi_del(&q_vector->napi); } kfree(adapter->q_vectors); adapter->q_vectors = NULL; From b92defe4e8ee86996c16417ad8c804cb4395fddd Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Tue, 9 May 2023 19:11:48 +0800 Subject: [PATCH 478/509] iavf: Fix out-of-bounds when setting channels on remove [ Upstream commit 7c4bced3caa749ce468b0c5de711c98476b23a52 ] If we set channels greater during iavf_remove(), and waiting reset done would be timeout, then returned with error but changed num_active_queues directly, that will lead to OOB like the following logs. Because the num_active_queues is greater than tx/rx_rings[] allocated actually. Reproducer: [root@host ~]# cat repro.sh #!/bin/bash pf_dbsf="0000:41:00.0" vf0_dbsf="0000:41:02.0" g_pids=() function do_set_numvf() { echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs sleep $((RANDOM%3+1)) echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs sleep $((RANDOM%3+1)) } function do_set_channel() { local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/) [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; } ifconfig $nic 192.168.18.5 netmask 255.255.255.0 ifconfig $nic up ethtool -L $nic combined 1 ethtool -L $nic combined 4 sleep $((RANDOM%3)) } function on_exit() { local pid for pid in "${g_pids[@]}"; do kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null done g_pids=() } trap "on_exit; exit" EXIT while :; do do_set_numvf ; done & g_pids+=($!) while :; do do_set_channel ; done & g_pids+=($!) wait Result: [ 3506.152887] iavf 0000:41:02.0: Removing device [ 3510.400799] ================================================================== [ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf] [ 3510.400823] Read of size 8 at addr ffff88b6f9311008 by task repro.sh/55536 [ 3510.400823] [ 3510.400830] CPU: 101 PID: 55536 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 [ 3510.400832] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021 [ 3510.400835] Call Trace: [ 3510.400851] dump_stack+0x71/0xab [ 3510.400860] print_address_description+0x6b/0x290 [ 3510.400865] ? iavf_free_all_tx_resources+0x156/0x160 [iavf] [ 3510.400868] kasan_report+0x14a/0x2b0 [ 3510.400873] iavf_free_all_tx_resources+0x156/0x160 [iavf] [ 3510.400880] iavf_remove+0x2b6/0xc70 [iavf] [ 3510.400884] ? iavf_free_all_rx_resources+0x160/0x160 [iavf] [ 3510.400891] ? wait_woken+0x1d0/0x1d0 [ 3510.400895] ? notifier_call_chain+0xc1/0x130 [ 3510.400903] pci_device_remove+0xa8/0x1f0 [ 3510.400910] device_release_driver_internal+0x1c6/0x460 [ 3510.400916] pci_stop_bus_device+0x101/0x150 [ 3510.400919] pci_stop_and_remove_bus_device+0xe/0x20 [ 3510.400924] pci_iov_remove_virtfn+0x187/0x420 [ 3510.400927] ? pci_iov_add_virtfn+0xe10/0xe10 [ 3510.400929] ? pci_get_subsys+0x90/0x90 [ 3510.400932] sriov_disable+0xed/0x3e0 [ 3510.400936] ? bus_find_device+0x12d/0x1a0 [ 3510.400953] i40e_free_vfs+0x754/0x1210 [i40e] [ 3510.400966] ? i40e_reset_all_vfs+0x880/0x880 [i40e] [ 3510.400968] ? pci_get_device+0x7c/0x90 [ 3510.400970] ? pci_get_subsys+0x90/0x90 [ 3510.400982] ? pci_vfs_assigned.part.7+0x144/0x210 [ 3510.400987] ? __mutex_lock_slowpath+0x10/0x10 [ 3510.400996] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e] [ 3510.401001] sriov_numvfs_store+0x214/0x290 [ 3510.401005] ? sriov_totalvfs_show+0x30/0x30 [ 3510.401007] ? __mutex_lock_slowpath+0x10/0x10 [ 3510.401011] ? __check_object_size+0x15a/0x350 [ 3510.401018] kernfs_fop_write+0x280/0x3f0 [ 3510.401022] vfs_write+0x145/0x440 [ 3510.401025] ksys_write+0xab/0x160 [ 3510.401028] ? __ia32_sys_read+0xb0/0xb0 [ 3510.401031] ? fput_many+0x1a/0x120 [ 3510.401032] ? filp_close+0xf0/0x130 [ 3510.401038] do_syscall_64+0xa0/0x370 [ 3510.401041] ? page_fault+0x8/0x30 [ 3510.401043] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 3510.401073] RIP: 0033:0x7f3a9bb842c0 [ 3510.401079] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24 [ 3510.401080] RSP: 002b:00007ffc05f1fe18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 3510.401083] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f3a9bb842c0 [ 3510.401085] RDX: 0000000000000002 RSI: 0000000002327408 RDI: 0000000000000001 [ 3510.401086] RBP: 0000000002327408 R08: 00007f3a9be53780 R09: 00007f3a9c8a4700 [ 3510.401086] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002 [ 3510.401087] R13: 0000000000000001 R14: 00007f3a9be52620 R15: 0000000000000001 [ 3510.401090] [ 3510.401093] Allocated by task 76795: [ 3510.401098] kasan_kmalloc+0xa6/0xd0 [ 3510.401099] __kmalloc+0xfb/0x200 [ 3510.401104] iavf_init_interrupt_scheme+0x26f/0x1310 [iavf] [ 3510.401108] iavf_watchdog_task+0x1d58/0x4050 [iavf] [ 3510.401114] process_one_work+0x56a/0x11f0 [ 3510.401115] worker_thread+0x8f/0xf40 [ 3510.401117] kthread+0x2a0/0x390 [ 3510.401119] ret_from_fork+0x1f/0x40 [ 3510.401122] 0xffffffffffffffff [ 3510.401123] In timeout handling, we should keep the original num_active_queues and reset num_req_queues to 0. Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count") Signed-off-by: Ding Hui Cc: Donglin Peng Cc: Huang Cun Reviewed-by: Leon Romanovsky Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 4680a2fe6d3c..05cd70579c16 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -968,7 +968,7 @@ static int iavf_set_channels(struct net_device *netdev, } if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->num_active_queues = num_req; + adapter->num_req_queues = 0; return -EOPNOTSUPP; } From 5bc78ba88905fdd1f2fc4e848f0cdbb67fd7789b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Wed, 14 Jun 2023 10:18:25 +0800 Subject: [PATCH 479/509] security: keys: Modify mismatched function name [ Upstream commit 2a4152742025c5f21482e8cebc581702a0fa5b01 ] No functional modification involved. security/keys/trusted-keys/trusted_tpm2.c:203: warning: expecting prototype for tpm_buf_append_auth(). Prototype was for tpm2_buf_append_auth() instead. Fixes: 2e19e10131a0 ("KEYS: trusted: Move TPM2 trusted keys code") Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5524 Signed-off-by: Jiapeng Chong Reviewed-by: Paul Moore Signed-off-by: Jarkko Sakkinen Signed-off-by: Sasha Levin --- security/keys/trusted-keys/trusted_tpm2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index 4c19d3abddbe..65f68856414a 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -21,7 +21,7 @@ static struct tpm2_hash tpm2_hash_map[] = { }; /** - * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer. + * tpm2_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer. * * @buf: an allocated tpm_buf instance * @session_handle: session handle From 41b00238699a7ed473bdd9f632bcd9f1aad2c09c Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Sun, 16 Jul 2023 15:07:41 +0530 Subject: [PATCH 480/509] octeontx2-pf: Dont allocate BPIDs for LBK interfaces [ Upstream commit 8fcd7c7b3a38ab5e452f542fda8f7940e77e479a ] Current driver enables backpressure for LBK interfaces. But these interfaces do not support this feature. Hence, this patch fixes the issue by skipping the backpressure configuration for these interfaces. Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool"). Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Link: https://lore.kernel.org/r/20230716093741.28063-1-gakula@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 54aeb276b9a0..000dd89c4baf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1311,8 +1311,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) if (err) goto err_free_npa_lf; - /* Enable backpressure */ - otx2_nix_config_bp(pf, true); + /* Enable backpressure for CGX mapped PF/VFs */ + if (!is_otx2_lbkvf(pf->pdev)) + otx2_nix_config_bp(pf, true); /* Init Auras and pools used by NIX RQ, for free buffer ptrs */ err = otx2_rq_aura_pool_init(pf); From 937105d2b0bf024382de3f24eaf1966db7c9a0ff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Jul 2023 14:44:45 +0000 Subject: [PATCH 481/509] tcp: annotate data-races around tcp_rsk(req)->ts_recent [ Upstream commit eba20811f32652bc1a52d5e7cc403859b86390d9 ] TCP request sockets are lockless, tcp_rsk(req)->ts_recent can change while being read by another cpu as syzbot noticed. This is harmless, but we should annotate the known races. Note that tcp_check_req() changes req->ts_recent a bit early, we might change this in the future. BUG: KCSAN: data-race in tcp_check_req / tcp_check_req write to 0xffff88813c8afb84 of 4 bytes by interrupt on cpu 1: tcp_check_req+0x694/0xc70 net/ipv4/tcp_minisocks.c:762 tcp_v4_rcv+0x12db/0x1b70 net/ipv4/tcp_ipv4.c:2071 ip_protocol_deliver_rcu+0x356/0x6d0 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x13c/0x1a0 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:303 [inline] ip_local_deliver+0xec/0x1c0 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:468 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:303 [inline] ip_rcv+0x197/0x270 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5493 [inline] __netif_receive_skb+0x90/0x1b0 net/core/dev.c:5607 process_backlog+0x21f/0x380 net/core/dev.c:5935 __napi_poll+0x60/0x3b0 net/core/dev.c:6498 napi_poll net/core/dev.c:6565 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6698 __do_softirq+0xc1/0x265 kernel/softirq.c:571 do_softirq+0x7e/0xb0 kernel/softirq.c:472 __local_bh_enable_ip+0x64/0x70 kernel/softirq.c:396 local_bh_enable+0x1f/0x20 include/linux/bottom_half.h:33 rcu_read_unlock_bh include/linux/rcupdate.h:843 [inline] __dev_queue_xmit+0xabb/0x1d10 net/core/dev.c:4271 dev_queue_xmit include/linux/netdevice.h:3088 [inline] neigh_hh_output include/net/neighbour.h:528 [inline] neigh_output include/net/neighbour.h:542 [inline] ip_finish_output2+0x700/0x840 net/ipv4/ip_output.c:229 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:317 NF_HOOK_COND include/linux/netfilter.h:292 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:431 dst_output include/net/dst.h:458 [inline] ip_local_out net/ipv4/ip_output.c:126 [inline] __ip_queue_xmit+0xa4d/0xa70 net/ipv4/ip_output.c:533 ip_queue_xmit+0x38/0x40 net/ipv4/ip_output.c:547 __tcp_transmit_skb+0x1194/0x16e0 net/ipv4/tcp_output.c:1399 tcp_transmit_skb net/ipv4/tcp_output.c:1417 [inline] tcp_write_xmit+0x13ff/0x2fd0 net/ipv4/tcp_output.c:2693 __tcp_push_pending_frames+0x6a/0x1a0 net/ipv4/tcp_output.c:2877 tcp_push_pending_frames include/net/tcp.h:1952 [inline] __tcp_sock_set_cork net/ipv4/tcp.c:3336 [inline] tcp_sock_set_cork+0xe8/0x100 net/ipv4/tcp.c:3343 rds_tcp_xmit_path_complete+0x3b/0x40 net/rds/tcp_send.c:52 rds_send_xmit+0xf8d/0x1420 net/rds/send.c:422 rds_send_worker+0x42/0x1d0 net/rds/threads.c:200 process_one_work+0x3e6/0x750 kernel/workqueue.c:2408 worker_thread+0x5f2/0xa10 kernel/workqueue.c:2555 kthread+0x1d7/0x210 kernel/kthread.c:379 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 read to 0xffff88813c8afb84 of 4 bytes by interrupt on cpu 0: tcp_check_req+0x32a/0xc70 net/ipv4/tcp_minisocks.c:622 tcp_v4_rcv+0x12db/0x1b70 net/ipv4/tcp_ipv4.c:2071 ip_protocol_deliver_rcu+0x356/0x6d0 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x13c/0x1a0 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:303 [inline] ip_local_deliver+0xec/0x1c0 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:468 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:303 [inline] ip_rcv+0x197/0x270 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5493 [inline] __netif_receive_skb+0x90/0x1b0 net/core/dev.c:5607 process_backlog+0x21f/0x380 net/core/dev.c:5935 __napi_poll+0x60/0x3b0 net/core/dev.c:6498 napi_poll net/core/dev.c:6565 [inline] net_rx_action+0x32b/0x750 net/core/dev.c:6698 __do_softirq+0xc1/0x265 kernel/softirq.c:571 run_ksoftirqd+0x17/0x20 kernel/softirq.c:939 smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164 kthread+0x1d7/0x210 kernel/kthread.c:379 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 value changed: 0x1cd237f1 -> 0x1cd237f2 Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: syzbot Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230717144445.653164-3-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 9 ++++++--- net/ipv4/tcp_output.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d62d5d7764ad..b40780fde791 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -960,7 +960,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, - req->ts_recent, + READ_ONCE(req->ts_recent), 0, tcp_md5_do_lookup(sk, l3index, addr, AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8d854feebdb0..01e27620b7ee 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -523,7 +523,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->max_window = newtp->snd_wnd; if (newtp->rx_opt.tstamp_ok) { - newtp->rx_opt.ts_recent = req->ts_recent; + newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent); newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { @@ -586,7 +586,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { - tmp_opt.ts_recent = req->ts_recent; + tmp_opt.ts_recent = READ_ONCE(req->ts_recent); if (tmp_opt.rcv_tsecr) tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off; /* We do not store true stamp, but it is not required, @@ -726,8 +726,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* In sequence, PAWS is OK. */ + /* TODO: We probably should defer ts_recent change once + * we take ownership of @req. + */ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) - req->ts_recent = tmp_opt.rcv_tsval; + WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval); if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { /* Truncate SYN, it is out of window starting diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e4ad274ec7a3..86e896351364 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -874,7 +874,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; - opts->tsecr = req->ts_recent; + opts->tsecr = READ_ONCE(req->ts_recent); remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(ireq->sack_ok)) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5392aebd48f1..79d6f6ea3c54 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1151,7 +1151,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, - req->ts_recent, sk->sk_bound_dev_if, + READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); } From 1a478ad1297a40675350d834871b8f6a4729eef6 Mon Sep 17 00:00:00 2001 From: Wang Ming Date: Mon, 17 Jul 2023 17:59:19 +0800 Subject: [PATCH 482/509] net: ipv4: Use kfree_sensitive instead of kfree [ Upstream commit daa751444fd9d4184270b1479d8af49aaf1a1ee6 ] key might contain private part of the key, so better use kfree_sensitive to free it. Fixes: 38320c70d282 ("[IPSEC]: Use crypto_aead and authenc in ESP") Signed-off-by: Wang Ming Reviewed-by: Tariq Toukan Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/esp4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 20d738137841..28252029bd79 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1134,7 +1134,7 @@ static int esp_init_authenc(struct xfrm_state *x) err = crypto_aead_setkey(aead, key, keylen); free_key: - kfree(key); + kfree_sensitive(key); error: return err; From d06fc7b39199e7d2930ff6de20e52fc6f0ee86f4 Mon Sep 17 00:00:00 2001 From: Yuanjun Gong Date: Mon, 17 Jul 2023 22:45:19 +0800 Subject: [PATCH 483/509] net:ipv6: check return value of pskb_trim() [ Upstream commit 4258faa130be4ea43e5e2d839467da421b8ff274 ] goto tx_err if an unexpected result is returned by pskb_tirm() in ip6erspan_tunnel_xmit(). Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support") Signed-off-by: Yuanjun Gong Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/ip6_gre.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 7b50e1811678..2df1036330f8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -955,7 +955,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, goto tx_err; if (skb->len > dev->mtu + dev->hard_header_len) { - pskb_trim(skb, dev->mtu + dev->hard_header_len); + if (pskb_trim(skb, dev->mtu + dev->hard_header_len)) + goto tx_err; truncate = true; } From bc9d4d432f78e3f7b37602eb96c2956b54703220 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 17 Jul 2023 14:59:18 -0700 Subject: [PATCH 484/509] Revert "tcp: avoid the lookup process failing to get sk in ehash table" [ Upstream commit 81b3ade5d2b98ad6e0a473b0e1e420a801275592 ] This reverts commit 3f4ca5fafc08881d7a57daa20449d171f2887043. Commit 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in ehash table") reversed the order in how a socket is inserted into ehash to fix an issue that ehash-lookup could fail when reqsk/full sk/twsk are swapped. However, it introduced another lookup failure. The full socket in ehash is allocated from a slab with SLAB_TYPESAFE_BY_RCU and does not have SOCK_RCU_FREE, so the socket could be reused even while it is being referenced on another CPU doing RCU lookup. Let's say a socket is reused and inserted into the same hash bucket during lookup. After the blamed commit, a new socket is inserted at the end of the list. If that happens, we will skip sockets placed after the previous position of the reused socket, resulting in ehash lookup failure. As described in Documentation/RCU/rculist_nulls.rst, we should insert a new socket at the head of the list to avoid such an issue. This issue, the swap-lookup-failure, and another variant reported in [0] can all be handled properly by adding a locked ehash lookup suggested by Eric Dumazet [1]. However, this issue could occur for every packet, thus more likely than the other two races, so let's revert the change for now. Link: https://lore.kernel.org/netdev/20230606064306.9192-1-duanmuquan@baidu.com/ [0] Link: https://lore.kernel.org/netdev/CANn89iK8snOz8TYOhhwfimC7ykYA78GA3Nyv8x06SZYa1nKdyA@mail.gmail.com/ [1] Fixes: 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in ehash table") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230717215918.15723-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/inet_hashtables.c | 17 ++--------------- net/ipv4/inet_timewait_sock.c | 8 ++++---- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 79bf550c9dfc..ad050f8476b8 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -571,20 +571,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); - ret = sk_hashed(osk); - if (ret) { - /* Before deleting the node, we insert a new one to make - * sure that the look-up-sk process would not miss either - * of them and that at least one node would exist in ehash - * table all the time. Otherwise there's a tiny chance - * that lookup process could find nothing in ehash table. - */ - __sk_nulls_add_node_tail_rcu(sk, list); - sk_nulls_del_node_init_rcu(osk); - } - goto unlock; - } - if (found_dup_sk) { + ret = sk_nulls_del_node_init_rcu(osk); + } else if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; @@ -593,7 +581,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ret) __sk_nulls_add_node_rcu(sk, list); -unlock: spin_unlock(lock); return ret; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a00102d7c7fd..c411c87ae865 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -81,10 +81,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw) } EXPORT_SYMBOL_GPL(inet_twsk_put); -static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw, - struct hlist_nulls_head *list) +static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) { - hlist_nulls_add_tail_rcu(&tw->tw_node, list); + hlist_nulls_add_head_rcu(&tw->tw_node, list); } static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -120,7 +120,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); - inet_twsk_add_node_tail_rcu(tw, &ehead->chain); + inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) From 49e435ca02c797e6e877bb79a9c3c3f580462fdb Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Sat, 15 Jul 2023 16:16:56 +0800 Subject: [PATCH 485/509] fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe [ Upstream commit 4e88761f5f8c7869f15a2046b1a1116f4fab4ac8 ] This func misses checking for platform_get_irq()'s call and may passes the negative error codes to request_irq(), which takes unsigned IRQ #, causing it to fail with -EINVAL, overriding an original error code. Fix this by stop calling request_irq() with invalid IRQ #s. Fixes: 1630d85a8312 ("au1200fb: fix hardcoded IRQ") Signed-off-by: Zhang Shurong Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/au1200fb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c index a8a0a448cdb5..80f54111baec 100644 --- a/drivers/video/fbdev/au1200fb.c +++ b/drivers/video/fbdev/au1200fb.c @@ -1732,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev) /* Now hook interrupt too */ irq = platform_get_irq(dev, 0); + if (irq < 0) + return irq; + ret = request_irq(irq, au1200fb_handle_irq, IRQF_SHARED, "lcd", (void *)dev); if (ret) { From a6f1988780a7c9e5f54a1d6d363db99e850106fc Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 18 Jul 2023 10:41:51 -0700 Subject: [PATCH 486/509] llc: Don't drop packet from non-root netns. [ Upstream commit 6631463b6e6673916d2481f692938f393148aa82 ] Now these upper layer protocol handlers can be called from llc_rcv() as sap->rcv_func(), which is registered by llc_sap_open(). * function which is passed to register_8022_client() -> no in-kernel user calls register_8022_client(). * snap_rcv() `- proto->rcvfunc() : registered by register_snap_client() -> aarp_rcv() and atalk_rcv() drop packets from non-root netns * stp_pdu_rcv() `- garp_protos[]->rcv() : registered by stp_proto_register() -> garp_pdu_rcv() and br_stp_rcv() are netns-aware So, we can safely remove the netns restriction in llc_rcv(). Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/llc/llc_input.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index c309b72a5877..7cac441862e2 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, void (*sta_handler)(struct sk_buff *skb); void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb); - if (!net_eq(dev_net(dev), &init_net)) - goto drop; - /* * When the interface is in promisc. mode, drop all the crap that it * receives, do not try to analyse it. From 533193a23914558127190e7bf7cb505ab6389acd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Jul 2023 00:29:58 +0200 Subject: [PATCH 487/509] netfilter: nf_tables: fix spurious set element insertion failure [ Upstream commit ddbd8be68941985f166f5107109a90ce13147c44 ] On some platforms there is a padding hole in the nft_verdict structure, between the verdict code and the chain pointer. On element insertion, if the new element clashes with an existing one and NLM_F_EXCL flag isn't set, we want to ignore the -EEXIST error as long as the data associated with duplicated element is the same as the existing one. The data equality check uses memcmp. For normal data (NFT_DATA_VALUE) this works fine, but for NFT_DATA_VERDICT padding area leads to spurious failure even if the verdict data is the same. This then makes the insertion fail with 'already exists' error, even though the new "key : data" matches an existing entry and userspace told the kernel that it doesn't want to receive an error indication. Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion") Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d56f5d7fa545..9c3a9e3f1ede 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8914,6 +8914,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; + + /* zero padding hole for memcmp */ + memset(data, 0, sizeof(*data)); data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { From 9c2df17e3cfc3e7674de20d3acf249c768400674 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Jul 2023 01:30:33 +0200 Subject: [PATCH 488/509] netfilter: nf_tables: can't schedule in nft_chain_validate [ Upstream commit 314c82841602a111c04a7210c21dc77e0d560242 ] Can be called via nft set element list iteration, which may acquire rcu and/or bh read lock (depends on set type). BUG: sleeping function called from invalid context at net/netfilter/nf_tables_api.c:3353 in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1232, name: nft preempt_count: 0, expected: 0 RCU nest depth: 1, expected: 0 2 locks held by nft/1232: #0: ffff8881180e3ea8 (&nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid #1: ffffffff83f5f540 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire Call Trace: nft_chain_validate nft_lookup_validate_setelem nft_pipapo_walk nft_lookup_validate nft_chain_validate nft_immediate_validate nft_chain_validate nf_tables_validate nf_tables_abort No choice but to move it to nf_tables_validate(). Fixes: 81ea01066741 ("netfilter: nf_tables: add rescheduling points during loop detection walks") Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9c3a9e3f1ede..a8d316a58e44 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3276,8 +3276,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) if (err < 0) return err; } - - cond_resched(); } return 0; @@ -3301,6 +3299,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) err = nft_chain_validate(&ctx, chain); if (err < 0) return err; + + cond_resched(); } return 0; From 3a91099ecd59a42d1632fcb152bf7222f268ea2b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 19 Jul 2023 21:08:21 +0200 Subject: [PATCH 489/509] netfilter: nft_set_pipapo: fix improper element removal [ Upstream commit 87b5a5c209405cb6b57424cdfa226a6dbd349232 ] end key should be equal to start unless NFT_SET_EXT_KEY_END is present. Its possible to add elements that only have a start key ("{ 1.0.0.0 . 2.0.0.0 }") without an internval end. Insertion treats this via: if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) end = (const u8 *)nft_set_ext_key_end(ext)->data; else end = start; but removal side always uses nft_set_ext_key_end(). This is wrong and leads to garbage remaining in the set after removal next lookup/insert attempt will give: BUG: KASAN: slab-use-after-free in pipapo_get+0x8eb/0xb90 Read of size 1 at addr ffff888100d50586 by task nft-pipapo_uaf_/1399 Call Trace: kasan_report+0x105/0x140 pipapo_get+0x8eb/0xb90 nft_pipapo_insert+0x1dc/0x1710 nf_tables_newsetelem+0x31f5/0x4e00 .. Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Reported-by: lonial con Reviewed-by: Stefano Brivio Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nft_set_pipapo.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7c759e9b4d84..3be93175b3ff 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1904,7 +1904,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, int i, start, rules_fx; match_start = data; - match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data; + + if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END)) + match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data; + else + match_end = data; start = first_rule; rules_fx = rules_f0; From 94c10c0fa51b78ae39af7f093c8d8db1fc39477e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 19 Jul 2023 20:19:43 +0200 Subject: [PATCH 490/509] netfilter: nf_tables: skip bound chain in netns release path [ Upstream commit 751d460ccff3137212f47d876221534bf0490996 ] Skip bound chain from netns release path, the rule that owns this chain releases these objects. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a8d316a58e44..40ed4dd530c5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -9185,6 +9185,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table) ctx.family = table->family; ctx.table = table; list_for_each_entry(chain, &table->chains, list) { + if (nft_chain_is_bound(chain)) + continue; + ctx.chain = chain; list_for_each_entry_safe(rule, nr, &chain->rules, list) { list_del(&rule->list); From 30e5460d69e631c0e84db37dba2d8f98648778d4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Jul 2023 09:17:21 +0200 Subject: [PATCH 491/509] netfilter: nf_tables: skip bound chain on rule flush [ Upstream commit 6eaf41e87a223ae6f8e7a28d6e78384ad7e407f8 ] Skip bound chain when flushing table rules, the rule that owns this chain releases these objects. Otherwise, the following warning is triggered: WARNING: CPU: 2 PID: 1217 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] CPU: 2 PID: 1217 Comm: chain-flush Not tainted 6.1.39 #1 RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 40ed4dd530c5..356416564d9f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3611,6 +3611,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; + if (nft_chain_is_bound(chain)) + continue; ctx.chain = chain; err = nft_delrule_by_chain(&ctx); From 1d4f2c4be1365afdb1411884d71bf3b04a2c9b1e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:47 +0000 Subject: [PATCH 492/509] tcp: annotate data-races around tp->tcp_tx_delay [ Upstream commit 348b81b68b13ebd489a3e6a46aa1c384c731c919 ] do_tcp_getsockopt() reads tp->tcp_tx_delay while another cpu might change its value. Fixes: a842fe1425cb ("tcp: add optional per socket transmit delay") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-2-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index cc42ceadc112..39919d1436ce 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3447,7 +3447,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_TX_DELAY: if (val) tcp_enable_tx_delay(); - tp->tcp_tx_delay = val; + WRITE_ONCE(tp->tcp_tx_delay, val); break; default: err = -ENOPROTOOPT; @@ -3902,7 +3902,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_TX_DELAY: - val = tp->tcp_tx_delay; + val = READ_ONCE(tp->tcp_tx_delay); break; case TCP_TIMESTAMP: From 7b52a78a91fd3145909d5cc7ec3c727061e4ad21 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:49 +0000 Subject: [PATCH 493/509] tcp: annotate data-races around tp->keepalive_time [ Upstream commit 4164245c76ff906c9086758e1c3f87082a7f5ef5 ] do_tcp_getsockopt() reads tp->keepalive_time while another cpu might change its value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-4-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tcp.h | 7 +++++-- net/ipv4/tcp.c | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index e231101e5001..92de7c049f19 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1459,9 +1459,12 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp) static inline int keepalive_time_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; - return tp->keepalive_time ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); + /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */ + val = READ_ONCE(tp->keepalive_time); + + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); } static inline int keepalive_probes(const struct tcp_sock *tp) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 39919d1436ce..053e4880d8f0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3093,7 +3093,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val) if (val < 1 || val > MAX_TCP_KEEPIDLE) return -EINVAL; - tp->keepalive_time = val * HZ; + /* Paired with WRITE_ONCE() in keepalive_time_when() */ + WRITE_ONCE(tp->keepalive_time, val * HZ); if (sock_flag(sk, SOCK_KEEPOPEN) && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { u32 elapsed = keepalive_time_elapsed(tp); From 93715448f1162a54e86f93c18c6b4bb8a86bf4d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:50 +0000 Subject: [PATCH 494/509] tcp: annotate data-races around tp->keepalive_intvl [ Upstream commit 5ecf9d4f52ff2f1d4d44c9b68bc75688e82f13b4 ] do_tcp_getsockopt() reads tp->keepalive_intvl while another cpu might change its value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-5-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tcp.h | 9 +++++++-- net/ipv4/tcp.c | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 92de7c049f19..428f84f6e0d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1451,9 +1451,14 @@ void tcp_leave_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; - return tp->keepalive_intvl ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); + /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl() + * and do_tcp_setsockopt(). + */ + val = READ_ONCE(tp->keepalive_intvl); + + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl); } static inline int keepalive_time_when(const struct tcp_sock *tp) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 053e4880d8f0..b5a05b098414 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3126,7 +3126,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val) return -EINVAL; lock_sock(sk); - tcp_sk(sk)->keepalive_intvl = val * HZ; + WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ); release_sock(sk); return 0; } @@ -3324,7 +3324,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, if (val < 1 || val > MAX_TCP_KEEPINTVL) err = -EINVAL; else - tp->keepalive_intvl = val * HZ; + WRITE_ONCE(tp->keepalive_intvl, val * HZ); break; case TCP_KEEPCNT: if (val < 1 || val > MAX_TCP_KEEPCNT) From a5c30a51876470499b068f752a89621f52a0d5e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:51 +0000 Subject: [PATCH 495/509] tcp: annotate data-races around tp->keepalive_probes [ Upstream commit 6e5e1de616bf5f3df1769abc9292191dfad9110a ] do_tcp_getsockopt() reads tp->keepalive_probes while another cpu might change its value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-6-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tcp.h | 9 +++++++-- net/ipv4/tcp.c | 5 +++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 428f84f6e0d0..be81a930b91f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1475,9 +1475,14 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) static inline int keepalive_probes(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); + int val; - return tp->keepalive_probes ? : - READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); + /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt() + * and do_tcp_setsockopt(). + */ + val = READ_ONCE(tp->keepalive_probes); + + return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes); } static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b5a05b098414..80212bb0400c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3138,7 +3138,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val) return -EINVAL; lock_sock(sk); - tcp_sk(sk)->keepalive_probes = val; + /* Paired with READ_ONCE() in keepalive_probes() */ + WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val); release_sock(sk); return 0; } @@ -3330,7 +3331,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, if (val < 1 || val > MAX_TCP_KEEPCNT) err = -EINVAL; else - tp->keepalive_probes = val; + WRITE_ONCE(tp->keepalive_probes, val); break; case TCP_SYNCNT: if (val < 1 || val > MAX_TCP_SYNCNT) From cf6c06ac74879726c88fe1ec013727006cdbdd58 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 12 Jun 2021 21:32:14 +0900 Subject: [PATCH 496/509] net: Introduce net.ipv4.tcp_migrate_req. [ Upstream commit f9ac779f881c2ec3d1cdcd7fa9d4f9442bf60e80 ] This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this option is enabled or eBPF program is attached, we will be able to migrate child sockets from a listener to another in the same reuseport group after close() or shutdown() syscalls. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Reviewed-by: Benjamin Herrenschmidt Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210612123224.12525-2-kuniyu@amazon.co.jp Stable-dep-of: 3a037f0f3c4b ("tcp: annotate data-races around icsk->icsk_syn_retries") Signed-off-by: Sasha Levin --- Documentation/networking/ip-sysctl.rst | 25 +++++++++++++++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ 3 files changed, 35 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index df26cf4110ef..252212998378 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -713,6 +713,31 @@ tcp_syncookies - INTEGER network connections you can set this knob to 2 to enable unconditionally generation of syncookies. +tcp_migrate_req - BOOLEAN + The incoming connection is tied to a specific listening socket when + the initial SYN packet is received during the three-way handshake. + When a listener is closed, in-flight request sockets during the + handshake and established sockets in the accept queue are aborted. + + If the listener has SO_REUSEPORT enabled, other listeners on the + same port should have been able to accept such connections. This + option makes it possible to migrate such child sockets to another + listener after close() or shutdown(). + + The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should + usually be used to define the policy to pick an alive listener. + Otherwise, the kernel will randomly pick an alive listener only if + this option is enabled. + + Note that migration between listeners with different settings may + crash applications. Let's say migration happens from listener A to + B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from + the requests migrated from A. To avoid such a situation, cancel + migration by returning SK_DROP in the type of eBPF program, or + disable this option. + + Default: 0 + tcp_fastopen - INTEGER Enable TCP Fast Open (RFC7413) to send and accept data in the opening SYN packet. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 4a4a5270ff6f..9b0d8649ae5b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -131,6 +131,7 @@ struct netns_ipv4 { u8 sysctl_tcp_syn_retries; u8 sysctl_tcp_synack_retries; u8 sysctl_tcp_syncookies; + u8 sysctl_tcp_migrate_req; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5aa8bde3e9c8..59ba518a85b9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -878,6 +878,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, }, #endif + { + .procname = "tcp_migrate_req", + .data = &init_net.ipv4.sysctl_tcp_migrate_req, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, { .procname = "tcp_reordering", .data = &init_net.ipv4.sysctl_tcp_reordering, From 7b0084918c5f7da21dbb91ec24da7a554edd3209 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 15 Jul 2022 10:17:46 -0700 Subject: [PATCH 497/509] tcp: Fix data-races around sysctl_tcp_syn(ack)?_retries. [ Upstream commit 20a3b1c0f603e8c55c3396abd12dfcfb523e4d3c ] While reading sysctl_tcp_syn(ack)?_retries, they can be changed concurrently. Thus, we need to add READ_ONCE() to their readers. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Stable-dep-of: 3a037f0f3c4b ("tcp: annotate data-races around icsk->icsk_syn_retries") Signed-off-by: Sasha Levin --- net/ipv4/inet_connection_sock.c | 3 ++- net/ipv4/tcp.c | 3 ++- net/ipv4/tcp_timer.c | 10 +++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 406305aaec90..dfea3088bc7e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -740,7 +740,8 @@ static void reqsk_timer_handler(struct timer_list *t) if (inet_sk_state_load(sk_listener) != TCP_LISTEN) goto drop; - max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries; + max_syn_ack_retries = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. * If synack was not acknowledged for 1 second, it means diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 80212bb0400c..fc4d560909b5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3743,7 +3743,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: val = tp->linger2; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 888683f2ff3e..715fdfa3e2ae 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -239,7 +239,8 @@ static int tcp_write_timeout(struct sock *sk) if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) __dst_negative_advice(sk); - retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); expired = icsk->icsk_retransmits >= retry_until; } else { if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) { @@ -406,12 +407,15 @@ abort: tcp_write_err(sk); static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) { struct inet_connection_sock *icsk = inet_csk(sk); - int max_retries = icsk->icsk_syn_retries ? : - sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ struct tcp_sock *tp = tcp_sk(sk); + int max_retries; req->rsk_ops->syn_ack_timeout(req); + /* add one more retry for fastopen */ + max_retries = icsk->icsk_syn_retries ? : + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1; + if (req->num_timeout >= max_retries) { tcp_write_err(sk); return; From 9168bd8f54c53020d642836abd6a20e6a682bce6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:52 +0000 Subject: [PATCH 498/509] tcp: annotate data-races around icsk->icsk_syn_retries [ Upstream commit 3a037f0f3c4bfe44518f2fbb478aa2f99a9cd8bb ] do_tcp_getsockopt() and reqsk_timer_handler() read icsk->icsk_syn_retries while another cpu might change its value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-7-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/tcp.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index dfea3088bc7e..5f71a1c74e7e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -740,7 +740,7 @@ static void reqsk_timer_handler(struct timer_list *t) if (inet_sk_state_load(sk_listener) != TCP_LISTEN) goto drop; - max_syn_ack_retries = icsk->icsk_syn_retries ? : + max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(net->ipv4.sysctl_tcp_synack_retries); /* Normally all the openreqs are young and become mature * (i.e. converted to established socket) for first timeout. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fc4d560909b5..e172348fc5c6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3072,7 +3072,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val) return -EINVAL; lock_sock(sk); - inet_csk(sk)->icsk_syn_retries = val; + WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val); release_sock(sk); return 0; } @@ -3337,7 +3337,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, if (val < 1 || val > MAX_TCP_SYNCNT) err = -EINVAL; else - icsk->icsk_syn_retries = val; + WRITE_ONCE(icsk->icsk_syn_retries, val); break; case TCP_SAVE_SYN: @@ -3743,7 +3743,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = keepalive_probes(tp); break; case TCP_SYNCNT: - val = icsk->icsk_syn_retries ? : + val = READ_ONCE(icsk->icsk_syn_retries) ? : READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: From f891375eba6e24c3a077c07d13e418ba681e0dac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:53 +0000 Subject: [PATCH 499/509] tcp: annotate data-races around tp->linger2 [ Upstream commit 9df5335ca974e688389c875546e5819778a80d59 ] do_tcp_getsockopt() reads tp->linger2 while another cpu might change its value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-8-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e172348fc5c6..f7c951463d9c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3350,11 +3350,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_LINGER2: if (val < 0) - tp->linger2 = -1; + WRITE_ONCE(tp->linger2, -1); else if (val > TCP_FIN_TIMEOUT_MAX / HZ) - tp->linger2 = TCP_FIN_TIMEOUT_MAX; + WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); else - tp->linger2 = val * HZ; + WRITE_ONCE(tp->linger2, val * HZ); break; case TCP_DEFER_ACCEPT: @@ -3747,7 +3747,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); break; case TCP_LINGER2: - val = tp->linger2; + val = READ_ONCE(tp->linger2); if (val >= 0) val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; From 9c786d5faf3ad271138c14655bfc9f642d6c0c92 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:54 +0000 Subject: [PATCH 500/509] tcp: annotate data-races around rskq_defer_accept [ Upstream commit ae488c74422fb1dcd807c0201804b3b5e8a322a3 ] do_tcp_getsockopt() reads rskq_defer_accept while another cpu might change its value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-9-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f7c951463d9c..50d674d35e52 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3359,9 +3359,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_DEFER_ACCEPT: /* Translate value in seconds to number of retransmits */ - icsk->icsk_accept_queue.rskq_defer_accept = - secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, - TCP_RTO_MAX / HZ); + WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ)); break; case TCP_WINDOW_CLAMP: @@ -3752,8 +3752,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; break; case TCP_DEFER_ACCEPT: - val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, - TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); + val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); + val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; From 3cf0a0f11d39e183000df4ce090409181834b677 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:55 +0000 Subject: [PATCH 501/509] tcp: annotate data-races around tp->notsent_lowat [ Upstream commit 1aeb87bc1440c5447a7fa2d6e3c2cca52cbd206b ] tp->notsent_lowat can be read locklessly from do_tcp_getsockopt() and tcp_poll(). Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-10-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/tcp.h | 6 +++++- net/ipv4/tcp.c | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index be81a930b91f..dcca41f3a224 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1991,7 +1991,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { struct net *net = sock_net((struct sock *)tp); - return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); + u32 val; + + val = READ_ONCE(tp->notsent_lowat); + + return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat); } /* @wake is one when sk_stream_write_space() calls us. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 50d674d35e52..8a441dfd258d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3437,7 +3437,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, err = tcp_repair_set_window(tp, optval, optlen); break; case TCP_NOTSENT_LOWAT: - tp->notsent_lowat = val; + WRITE_ONCE(tp->notsent_lowat, val); sk->sk_write_space(sk); break; case TCP_INQ: @@ -3913,7 +3913,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tcp_time_stamp_raw() + tp->tsoffset; break; case TCP_NOTSENT_LOWAT: - val = tp->notsent_lowat; + val = READ_ONCE(tp->notsent_lowat); break; case TCP_INQ: val = tp->recvmsg_inq; From d01afbfc2f7df4fc943edd5414589cdc1bffb4bd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:56 +0000 Subject: [PATCH 502/509] tcp: annotate data-races around icsk->icsk_user_timeout [ Upstream commit 26023e91e12c68669db416b97234328a03d8e499 ] This field can be read locklessly from do_tcp_getsockopt() Fixes: dca43c75e7e5 ("tcp: Add TCP_USER_TIMEOUT socket option.") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-11-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8a441dfd258d..0a5f61b3423b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3081,7 +3081,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt); void tcp_sock_set_user_timeout(struct sock *sk, u32 val) { lock_sock(sk); - inet_csk(sk)->icsk_user_timeout = val; + WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val); release_sock(sk); } EXPORT_SYMBOL(tcp_sock_set_user_timeout); @@ -3393,7 +3393,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, if (val < 0) err = -EINVAL; else - icsk->icsk_user_timeout = val; + WRITE_ONCE(icsk->icsk_user_timeout, val); break; case TCP_FASTOPEN: @@ -3890,7 +3890,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_USER_TIMEOUT: - val = icsk->icsk_user_timeout; + val = READ_ONCE(icsk->icsk_user_timeout); break; case TCP_FASTOPEN: From e0ac63e194f49a5b5e7b79b8017635b3c49f1994 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jul 2023 21:28:57 +0000 Subject: [PATCH 503/509] tcp: annotate data-races around fastopenq.max_qlen [ Upstream commit 70f360dd7042cb843635ece9d28335a4addff9eb ] This field can be read locklessly. Fixes: 1536e2857bd3 ("tcp: Add a TCP_FASTOPEN socket option to get a max backlog on its listner") Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230719212857.3943972-12-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/tcp.h | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6e3340379d85..11a98144bda0 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -473,7 +473,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog) struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn); - queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn); + WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn)); } static inline void tcp_move_syn(struct tcp_sock *tp, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0a5f61b3423b..3dd9b76f4055 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3894,7 +3894,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_FASTOPEN: - val = icsk->icsk_accept_queue.fastopenq.max_qlen; + val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); break; case TCP_FASTOPEN_CONNECT: diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 39fb037ce5f3..92d63cf3e50b 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -312,6 +312,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, static bool tcp_fastopen_queue_check(struct sock *sk) { struct fastopen_queue *fastopenq; + int max_qlen; /* Make sure the listener has enabled fastopen, and we don't * exceed the max # of pending TFO requests allowed before trying @@ -324,10 +325,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk) * temporarily vs a server not supporting Fast Open at all. */ fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq; - if (fastopenq->max_qlen == 0) + max_qlen = READ_ONCE(fastopenq->max_qlen); + if (max_qlen == 0) return false; - if (fastopenq->qlen >= fastopenq->max_qlen) { + if (fastopenq->qlen >= max_qlen) { struct request_sock *req1; spin_lock(&fastopenq->lock); req1 = fastopenq->rskq_rst_head; From e3da59f428200bf109fda878e95d1023647a5bd7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 20 Jul 2023 03:02:31 +0300 Subject: [PATCH 504/509] net: phy: prevent stale pointer dereference in phy_init() [ Upstream commit 1c613beaf877c0c0d755853dc62687e2013e55c4 ] mdio_bus_init() and phy_driver_register() both have error paths, and if those are ever hit, ethtool will have a stale pointer to the phy_ethtool_phy_ops stub structure, which references memory from a module that failed to load (phylib). It is probably hard to force an error in this code path even manually, but the error teardown path of phy_init() should be the same as phy_exit(), which is now simply not the case. Fixes: 55d8f053ce1b ("net: phy: Register ethtool PHY operations") Link: https://lore.kernel.org/netdev/ZLaiJ4G6TaJYGJyU@shell.armlinux.org.uk/ Suggested-by: Russell King (Oracle) Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20230720000231.1939689-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e771e0e8a9bc..095d16ceafcf 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3024,23 +3024,30 @@ static int __init phy_init(void) { int rc; + ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops); + rc = mdio_bus_init(); if (rc) - return rc; + goto err_ethtool_phy_ops; - ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops); features_init(); rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE); if (rc) - goto err_c45; + goto err_mdio_bus; rc = phy_driver_register(&genphy_driver, THIS_MODULE); - if (rc) { - phy_driver_unregister(&genphy_c45_driver); + if (rc) + goto err_c45; + + return 0; + err_c45: - mdio_bus_exit(); - } + phy_driver_unregister(&genphy_c45_driver); +err_mdio_bus: + mdio_bus_exit(); +err_ethtool_phy_ops: + ethtool_set_ethtool_phy_ops(NULL); return rc; } From 43e786aa51b80e5856c8f1c4ad46a5784364ebe6 Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Fri, 14 Jul 2023 20:33:41 +0000 Subject: [PATCH 505/509] tracing/histograms: Return an error if we fail to add histogram to hist_vars list commit 4b8b3905165ef98386a3c06f196c85d21292d029 upstream. Commit 6018b585e8c6 ("tracing/histograms: Add histograms to hist_vars if they have referenced variables") added a check to fail histogram creation if save_hist_vars() failed to add histogram to hist_vars list. But the commit failed to set ret to failed return code before jumping to unregister histogram, fix it. Link: https://lore.kernel.org/linux-trace-kernel/20230714203341.51396-1-mkhalfella@purestorage.com Cc: stable@vger.kernel.org Fixes: 6018b585e8c6 ("tracing/histograms: Add histograms to hist_vars if they have referenced variables") Signed-off-by: Mohamed Khalfella Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ae71cefb46bb..059a106e62be 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5822,7 +5822,8 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, goto out_unreg; if (has_hist_vars(hist_data) || hist_data->n_var_refs) { - if (save_hist_vars(hist_data)) + ret = save_hist_vars(hist_data); + if (ret) goto out_unreg; } From 1a1e793e021d75cd0accd8f329ec9456e5cd105e Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 13 Jul 2023 22:14:35 +0800 Subject: [PATCH 506/509] tracing: Fix memory leak of iter->temp when reading trace_pipe commit d5a821896360cc8b93a15bd888fabc858c038dc0 upstream. kmemleak reports: unreferenced object 0xffff88814d14e200 (size 256): comm "cat", pid 336, jiffies 4294871818 (age 779.490s) hex dump (first 32 bytes): 04 00 01 03 00 00 00 00 08 00 00 00 00 00 00 00 ................ 0c d8 c8 9b ff ff ff ff 04 5a ca 9b ff ff ff ff .........Z...... backtrace: [] __kmalloc+0x4f/0x140 [] trace_find_next_entry+0xbb/0x1d0 [] trace_print_lat_context+0xaf/0x4e0 [] print_trace_line+0x3e0/0x950 [] tracing_read_pipe+0x2d9/0x5a0 [] vfs_read+0x143/0x520 [] ksys_read+0xbd/0x160 [] do_syscall_64+0x3f/0x90 [] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 when reading file 'trace_pipe', 'iter->temp' is allocated or relocated in trace_find_next_entry() but not freed before 'trace_pipe' is closed. To fix it, free 'iter->temp' in tracing_release_pipe(). Link: https://lore.kernel.org/linux-trace-kernel/20230713141435.1133021-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: ff895103a84ab ("tracing: Save off entry when peeking at next entry") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) [Fix conflict due to lack of 649e72070cbbb8600eb823833e4748f5a0815116] Signed-off-by: Zheng Yejian Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4dd8c16f023b..7867fc39c4fc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6250,6 +6250,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_unlock(&trace_types_lock); free_cpumask_var(iter->started); + kfree(iter->temp); mutex_destroy(&iter->mutex); kfree(iter); From 115b19f8935339b69635fae7e1530b3bff262e40 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 1 Apr 2021 16:14:17 -0400 Subject: [PATCH 507/509] ftrace: Store the order of pages allocated in ftrace_page commit db42523b4f3e83ff86b53cdda219a9767c8b047f upstream. Instead of saving the size of the records field of the ftrace_page, store the order it uses to allocate the pages, as that is what is needed to know in order to free the pages. This simplifies the code. Link: https://lore.kernel.org/lkml/CAHk-=whyMxheOqXAORt9a7JK9gc9eHTgCJ55Pgs4p=X3RrQubQ@mail.gmail.com/ Signed-off-by: Linus Torvalds [ change log written by Steven Rostedt ] Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Zheng Yejian Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3dab978c156d..acba76c0c5c3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1091,7 +1091,7 @@ struct ftrace_page { struct ftrace_page *next; struct dyn_ftrace *records; int index; - int size; + int order; }; #define ENTRY_SIZE sizeof(struct dyn_ftrace) @@ -3188,7 +3188,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) ftrace_number_of_groups++; cnt = (PAGE_SIZE << order) / ENTRY_SIZE; - pg->size = cnt; + pg->order = order; if (cnt > count) cnt = count; @@ -3201,7 +3201,6 @@ ftrace_allocate_pages(unsigned long num_to_init) { struct ftrace_page *start_pg; struct ftrace_page *pg; - int order; int cnt; if (!num_to_init) @@ -3237,13 +3236,13 @@ ftrace_allocate_pages(unsigned long num_to_init) free_pages: pg = start_pg; while (pg) { - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - if (order >= 0) - free_pages((unsigned long)pg->records, order); + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } start_pg = pg->next; kfree(pg); pg = start_pg; - ftrace_number_of_pages -= 1 << order; ftrace_number_of_groups--; } pr_info("ftrace: FAILED to allocate memory for functions\n"); @@ -6239,6 +6238,7 @@ static int ftrace_process_locs(struct module *mod, p = start; pg = start_pg; while (p < end) { + unsigned long end_offset; addr = ftrace_call_adjust(*p++); /* * Some architecture linkers will pad between @@ -6249,7 +6249,8 @@ static int ftrace_process_locs(struct module *mod, if (!addr) continue; - if (pg->index == pg->size) { + end_offset = (pg->index+1) * sizeof(pg->records[0]); + if (end_offset > PAGE_SIZE << pg->order) { /* We should have allocated enough */ if (WARN_ON(!pg->next)) break; @@ -6418,7 +6419,6 @@ void ftrace_release_mod(struct module *mod) struct ftrace_page **last_pg; struct ftrace_page *tmp_page = NULL; struct ftrace_page *pg; - int order; mutex_lock(&ftrace_lock); @@ -6469,12 +6469,12 @@ void ftrace_release_mod(struct module *mod) /* Needs to be called outside of ftrace_lock */ clear_mod_from_hashes(pg); - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - if (order >= 0) - free_pages((unsigned long)pg->records, order); + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } tmp_page = pg->next; kfree(pg); - ftrace_number_of_pages -= 1 << order; ftrace_number_of_groups--; } } @@ -6792,7 +6792,6 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) struct ftrace_mod_map *mod_map = NULL; struct ftrace_init_func *func, *func_next; struct list_head clear_hash; - int order; INIT_LIST_HEAD(&clear_hash); @@ -6830,10 +6829,10 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) ftrace_update_tot_cnt--; if (!pg->index) { *last_pg = pg->next; - order = get_count_order(pg->size / ENTRIES_PER_PAGE); - if (order >= 0) - free_pages((unsigned long)pg->records, order); - ftrace_number_of_pages -= 1 << order; + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } ftrace_number_of_groups--; kfree(pg); pg = container_of(last_pg, struct ftrace_page, next); From edce5fba78ccf7ddf8a74906339225383fe54811 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 12 Jul 2023 14:04:52 +0800 Subject: [PATCH 508/509] ftrace: Fix possible warning on checking all pages used in ftrace_process_locs() commit 26efd79c4624294e553aeaa3439c646729bad084 upstream. As comments in ftrace_process_locs(), there may be NULL pointers in mcount_loc section: > Some architecture linkers will pad between > the different mcount_loc sections of different > object files to satisfy alignments. > Skip any NULL pointers. After commit 20e5227e9f55 ("ftrace: allow NULL pointers in mcount_loc"), NULL pointers will be accounted when allocating ftrace pages but skipped before adding into ftrace pages, this may result in some pages not being used. Then after commit 706c81f87f84 ("ftrace: Remove extra helper functions"), warning may occur at: WARN_ON(pg->next); To fix it, only warn for case that no pointers skipped but pages not used up, then free those unused pages after releasing ftrace_lock. Link: https://lore.kernel.org/linux-trace-kernel/20230712060452.3175675-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: 706c81f87f84 ("ftrace: Remove extra helper functions") Suggested-by: Steven Rostedt Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 45 +++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index acba76c0c5c3..31fec924b7c4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3196,6 +3196,22 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) return cnt; } +static void ftrace_free_pages(struct ftrace_page *pages) +{ + struct ftrace_page *pg = pages; + + while (pg) { + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } + pages = pg->next; + kfree(pg); + pg = pages; + ftrace_number_of_groups--; + } +} + static struct ftrace_page * ftrace_allocate_pages(unsigned long num_to_init) { @@ -3234,17 +3250,7 @@ ftrace_allocate_pages(unsigned long num_to_init) return start_pg; free_pages: - pg = start_pg; - while (pg) { - if (pg->records) { - free_pages((unsigned long)pg->records, pg->order); - ftrace_number_of_pages -= 1 << pg->order; - } - start_pg = pg->next; - kfree(pg); - pg = start_pg; - ftrace_number_of_groups--; - } + ftrace_free_pages(start_pg); pr_info("ftrace: FAILED to allocate memory for functions\n"); return NULL; } @@ -6190,9 +6196,11 @@ static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) { + struct ftrace_page *pg_unuse = NULL; struct ftrace_page *start_pg; struct ftrace_page *pg; struct dyn_ftrace *rec; + unsigned long skipped = 0; unsigned long count; unsigned long *p; unsigned long addr; @@ -6246,8 +6254,10 @@ static int ftrace_process_locs(struct module *mod, * object files to satisfy alignments. * Skip any NULL pointers. */ - if (!addr) + if (!addr) { + skipped++; continue; + } end_offset = (pg->index+1) * sizeof(pg->records[0]); if (end_offset > PAGE_SIZE << pg->order) { @@ -6261,8 +6271,10 @@ static int ftrace_process_locs(struct module *mod, rec->ip = addr; } - /* We should have used all pages */ - WARN_ON(pg->next); + if (pg->next) { + pg_unuse = pg->next; + pg->next = NULL; + } /* Assign the last page to ftrace_pages */ ftrace_pages = pg; @@ -6284,6 +6296,11 @@ static int ftrace_process_locs(struct module *mod, out: mutex_unlock(&ftrace_lock); + /* We should have used all pages unless we skipped some */ + if (pg_unuse) { + WARN_ON(!skipped); + ftrace_free_pages(pg_unuse); + } return ret; } From 3602dbc57b556eff2456715301d35a1ef8964bba Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Jul 2023 08:44:44 +0200 Subject: [PATCH 509/509] Linux 5.10.188 Link: https://lore.kernel.org/r/20230725104553.588743331@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Shuah Khan Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20230726045328.327600022@linuxfoundation.org Tested-by: Linux Kernel Functional Testing Tested-by: Florian Fainelli Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2aaf3b0b9250..7f9e1667d87d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 187 +SUBLEVEL = 188 EXTRAVERSION = NAME = Dare mighty things