From 71777492b745837481630c751111eeb19bb589cf Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Mon, 12 Apr 2021 17:55:12 +0800 Subject: [PATCH 01/39] vhost-vdpa: protect concurrent access to vhost device iotlb commit a9d064524fc3cf463b3bb14fa63de78aafb40dab upstream. Protect vhost device iotlb by vhost_dev->mutex. Otherwise, it might cause corruption of the list and interval tree in struct vhost_iotlb if userspace sends the VHOST_IOTLB_MSG_V2 message concurrently. Fixes: 4c8cf318("vhost: introduce vDPA-based backend") Cc: stable@vger.kernel.org Signed-off-by: Xie Yongji Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210412095512.178-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vdpa.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index fc5707ada024..84e5949bc861 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -749,9 +749,11 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, const struct vdpa_config_ops *ops = vdpa->config; int r = 0; + mutex_lock(&dev->mutex); + r = vhost_dev_check_owner(dev); if (r) - return r; + goto unlock; switch (msg->type) { case VHOST_IOTLB_UPDATE: @@ -772,6 +774,8 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, r = -EINVAL; break; } +unlock: + mutex_unlock(&dev->mutex); return r; } From 9857fccd653c0d820d45be5baea64ab731f4557e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Sat, 17 Apr 2021 11:38:39 +0300 Subject: [PATCH 02/39] gpio: omap: Save and restore sysconfig [ Upstream commit ddd8d94ca31e768c76cf8bfe34ba7b10136b3694 ] As we are using cpu_pm to save and restore context, we must also save and restore the GPIO sysconfig register. This is needed because we are not calling PM runtime functions at all with cpu_pm. We need to save the sysconfig on idle as it's value can get reconfigured by PM runtime and can be different from the init time value. Device specific flags like "ti,no-idle-on-init" can affect the init value. Fixes: b764a5863fd8 ("gpio: omap: Remove custom PM calls and use cpu_pm instead") Cc: Aaro Koskinen Cc: Adam Ford Cc: Andreas Kemnade Cc: Grygorii Strashko Cc: Peter Ujfalusi Signed-off-by: Tony Lindgren Acked-by: Grygorii Strashko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-omap.c | 9 +++++++++ include/linux/platform_data/gpio-omap.h | 3 +++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index f7ceb2b11afc..a7e8ed5191a8 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -29,6 +29,7 @@ #define OMAP4_GPIO_DEBOUNCINGTIME_MASK 0xFF struct gpio_regs { + u32 sysconfig; u32 irqenable1; u32 irqenable2; u32 wake_en; @@ -1072,6 +1073,7 @@ static void omap_gpio_init_context(struct gpio_bank *p) const struct omap_gpio_reg_offs *regs = p->regs; void __iomem *base = p->base; + p->context.sysconfig = readl_relaxed(base + regs->sysconfig); p->context.ctrl = readl_relaxed(base + regs->ctrl); p->context.oe = readl_relaxed(base + regs->direction); p->context.wake_en = readl_relaxed(base + regs->wkup_en); @@ -1091,6 +1093,7 @@ static void omap_gpio_restore_context(struct gpio_bank *bank) const struct omap_gpio_reg_offs *regs = bank->regs; void __iomem *base = bank->base; + writel_relaxed(bank->context.sysconfig, base + regs->sysconfig); writel_relaxed(bank->context.wake_en, base + regs->wkup_en); writel_relaxed(bank->context.ctrl, base + regs->ctrl); writel_relaxed(bank->context.leveldetect0, base + regs->leveldetect0); @@ -1118,6 +1121,10 @@ static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context) bank->saved_datain = readl_relaxed(base + bank->regs->datain); + /* Save syconfig, it's runtime value can be different from init value */ + if (bank->loses_context) + bank->context.sysconfig = readl_relaxed(base + bank->regs->sysconfig); + if (!bank->enabled_non_wakeup_gpios) goto update_gpio_context_count; @@ -1282,6 +1289,7 @@ static int gpio_omap_cpu_notifier(struct notifier_block *nb, static const struct omap_gpio_reg_offs omap2_gpio_regs = { .revision = OMAP24XX_GPIO_REVISION, + .sysconfig = OMAP24XX_GPIO_SYSCONFIG, .direction = OMAP24XX_GPIO_OE, .datain = OMAP24XX_GPIO_DATAIN, .dataout = OMAP24XX_GPIO_DATAOUT, @@ -1305,6 +1313,7 @@ static const struct omap_gpio_reg_offs omap2_gpio_regs = { static const struct omap_gpio_reg_offs omap4_gpio_regs = { .revision = OMAP4_GPIO_REVISION, + .sysconfig = OMAP4_GPIO_SYSCONFIG, .direction = OMAP4_GPIO_OE, .datain = OMAP4_GPIO_DATAIN, .dataout = OMAP4_GPIO_DATAOUT, diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h index 8b30b14b47d3..f377817ce75c 100644 --- a/include/linux/platform_data/gpio-omap.h +++ b/include/linux/platform_data/gpio-omap.h @@ -85,6 +85,7 @@ * omap2+ specific GPIO registers */ #define OMAP24XX_GPIO_REVISION 0x0000 +#define OMAP24XX_GPIO_SYSCONFIG 0x0010 #define OMAP24XX_GPIO_IRQSTATUS1 0x0018 #define OMAP24XX_GPIO_IRQSTATUS2 0x0028 #define OMAP24XX_GPIO_IRQENABLE2 0x002c @@ -108,6 +109,7 @@ #define OMAP24XX_GPIO_SETDATAOUT 0x0094 #define OMAP4_GPIO_REVISION 0x0000 +#define OMAP4_GPIO_SYSCONFIG 0x0010 #define OMAP4_GPIO_EOI 0x0020 #define OMAP4_GPIO_IRQSTATUSRAW0 0x0024 #define OMAP4_GPIO_IRQSTATUSRAW1 0x0028 @@ -148,6 +150,7 @@ #ifndef __ASSEMBLER__ struct omap_gpio_reg_offs { u16 revision; + u16 sysconfig; u16 direction; u16 datain; u16 dataout; From bf84ef2dd2ccdcd8f2658476d34b51455f970ce4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Wed, 21 Apr 2021 15:42:47 -0700 Subject: [PATCH 03/39] KEYS: trusted: Fix TPM reservation for seal/unseal [ Upstream commit 9d5171eab462a63e2fbebfccf6026e92be018f20 ] The original patch 8c657a0590de ("KEYS: trusted: Reserve TPM for seal and unseal operations") was correct on the mailing list: https://lore.kernel.org/linux-integrity/20210128235621.127925-4-jarkko@kernel.org/ But somehow got rebased so that the tpm_try_get_ops() in tpm2_seal_trusted() got lost. This causes an imbalanced put of the TPM ops and causes oopses on TIS based hardware. This fix puts back the lost tpm_try_get_ops() Fixes: 8c657a0590de ("KEYS: trusted: Reserve TPM for seal and unseal operations") Reported-by: Mimi Zohar Acked-by: Mimi Zohar Signed-off-by: James Bottomley Signed-off-by: Sasha Levin --- security/keys/trusted-keys/trusted_tpm2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index e2a0ed5d02f0..c87c4df8703d 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -79,7 +79,7 @@ int tpm2_seal_trusted(struct tpm_chip *chip, if (i == ARRAY_SIZE(tpm2_hash_map)) return -EINVAL; - rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_CREATE); + rc = tpm_try_get_ops(chip); if (rc) return rc; From a8cd07e4400d66c3304a38c5796a41c10ad76743 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 11 Apr 2021 11:36:46 +0300 Subject: [PATCH 04/39] vdpa/mlx5: Set err = -ENOMEM in case dma_map_sg_attrs fails [ Upstream commit be286f84e33da1a7f83142b64dbd86f600e73363 ] Set err = -ENOMEM if dma_map_sg_attrs() fails so the function reutrns error. Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code") Signed-off-by: Eli Cohen Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/20210411083646.910546-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/core/mr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index d300f799efcd..aa656f57bf5b 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -273,8 +273,10 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr mr->log_size = log_entity_size; mr->nsg = nsg; mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); - if (!mr->nent) + if (!mr->nent) { + err = -ENOMEM; goto err_map; + } err = create_direct_mr(mvdev, mr); if (err) From 2bbd8aafde362e2d2bb3af3ce2ea400c3860073f Mon Sep 17 00:00:00 2001 From: Yuanyuan Zhong Date: Mon, 12 Apr 2021 17:17:59 -0600 Subject: [PATCH 05/39] pinctrl: lewisburg: Update number of pins in community [ Upstream commit 196d941753297d0ca73c563ccd7d00be049ec226 ] When updating pin names for Intel Lewisburg, the numbers of pins were left behind. Update them accordingly. Fixes: e66ff71fd0db ("pinctrl: lewisburg: Update pin list according to v1.1v6") Signed-off-by: Yuanyuan Zhong Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin --- drivers/pinctrl/intel/pinctrl-lewisburg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-lewisburg.c b/drivers/pinctrl/intel/pinctrl-lewisburg.c index 7fdf4257df1e..ad4b446d588e 100644 --- a/drivers/pinctrl/intel/pinctrl-lewisburg.c +++ b/drivers/pinctrl/intel/pinctrl-lewisburg.c @@ -299,9 +299,9 @@ static const struct pinctrl_pin_desc lbg_pins[] = { static const struct intel_community lbg_communities[] = { LBG_COMMUNITY(0, 0, 71), LBG_COMMUNITY(1, 72, 132), - LBG_COMMUNITY(3, 133, 144), - LBG_COMMUNITY(4, 145, 180), - LBG_COMMUNITY(5, 181, 246), + LBG_COMMUNITY(3, 133, 143), + LBG_COMMUNITY(4, 144, 178), + LBG_COMMUNITY(5, 179, 246), }; static const struct intel_pinctrl_soc_data lbg_soc_data = { From fc2454cc0c4bbf3ab7556c8b38e042c6c7651e42 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Apr 2021 18:05:02 +0200 Subject: [PATCH 06/39] block: return -EBUSY when there are open partitions in blkdev_reread_part [ Upstream commit 68e6582e8f2dc32fd2458b9926564faa1fb4560e ] The switch to go through blkdev_get_by_dev means we now ignore the return value from bdev_disk_changed in __blkdev_get. Add a manual check to restore the old semantics. Fixes: 4601b4b130de ("block: reopen the device in blkdev_reread_part") Reported-by: Karel Zak Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210421160502.447418-1-hch@lst.de Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/ioctl.c b/block/ioctl.c index 3be4d0e2a96c..ed240e170e14 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -98,6 +98,8 @@ static int blkdev_reread_part(struct block_device *bdev, fmode_t mode) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; + if (bdev->bd_part_count) + return -EBUSY; /* * Reopen the device to revalidate the driver state and force a From 83d93d05376a807e0fccc60788193ced671fbf40 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Apr 2021 16:03:56 +0300 Subject: [PATCH 07/39] pinctrl: core: Show pin numbers for the controllers with base = 0 [ Upstream commit 482715ff0601c836152b792f06c353464d826b9b ] The commit f1b206cf7c57 ("pinctrl: core: print gpio in pins debugfs file") enabled GPIO pin number and label in debugfs for pin controller. However, it limited that feature to the chips where base is positive number. This, in particular, excluded chips where base is 0 for the historical or backward compatibility reasons. Refactor the code to include the latter as well. Fixes: f1b206cf7c57 ("pinctrl: core: print gpio in pins debugfs file") Cc: Drew Fustini Signed-off-by: Andy Shevchenko Tested-by: Drew Fustini Reviewed-by: Drew Fustini Link: https://lore.kernel.org/r/20210415130356.15885-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/pinctrl/core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 9fc4433fece4..20b477cd5a30 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -1604,8 +1604,8 @@ static int pinctrl_pins_show(struct seq_file *s, void *what) unsigned i, pin; #ifdef CONFIG_GPIOLIB struct pinctrl_gpio_range *range; - unsigned int gpio_num; struct gpio_chip *chip; + int gpio_num; #endif seq_printf(s, "registered pins: %d\n", pctldev->desc->npins); @@ -1625,7 +1625,7 @@ static int pinctrl_pins_show(struct seq_file *s, void *what) seq_printf(s, "pin %d (%s) ", pin, desc->name); #ifdef CONFIG_GPIOLIB - gpio_num = 0; + gpio_num = -1; list_for_each_entry(range, &pctldev->gpio_ranges, node) { if ((pin >= range->pin_base) && (pin < (range->pin_base + range->npins))) { @@ -1633,10 +1633,12 @@ static int pinctrl_pins_show(struct seq_file *s, void *what) break; } } - chip = gpio_to_chip(gpio_num); - if (chip && chip->gpiodev && chip->gpiodev->base) - seq_printf(s, "%u:%s ", gpio_num - - chip->gpiodev->base, chip->label); + if (gpio_num >= 0) + chip = gpio_to_chip(gpio_num); + else + chip = NULL; + if (chip) + seq_printf(s, "%u:%s ", gpio_num - chip->gpiodev->base, chip->label); else seq_puts(s, "0:? "); #endif From edc5d16013895b42ac9fb67542d99b9689c11ac2 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 14 Apr 2021 11:47:40 +0100 Subject: [PATCH 08/39] arm64: dts: allwinner: Revert SD card CD GPIO for Pine64-LTS [ Upstream commit 4d09ccc4a81e7de6b002482af554d8b5626f5041 ] Commit 941432d00768 ("arm64: dts: allwinner: Drop non-removable from SoPine/LTS SD card") enabled the card detect GPIO for the SOPine module, along the way with the Pine64-LTS, which share the same base .dtsi. This was based on the observation that the Pine64-LTS has as "push-push" SD card socket, and that the schematic mentions the card detect GPIO. After having received two reports about failing SD card access with that patch, some more research and polls on that subject revealed that there are at least two different versions of the Pine64-LTS out there: - On some boards (including mine) the card detect pin is "stuck" at high, regardless of an microSD card being inserted or not. - On other boards the card-detect is working, but is active-high, by virtue of an explicit inverter circuit, as shown in the schematic. To cover all versions of the board out there, and don't take any chances, let's revert the introduction of the active-low CD GPIO, but let's use the broken-cd property for the Pine64-LTS this time. That should avoid regressions and should work for everyone, even allowing SD card changes now. The SOPine card detect has proven to be working, so let's keep that GPIO in place. Fixes: 941432d00768 ("arm64: dts: allwinner: Drop non-removable from SoPine/LTS SD card") Reported-by: Michael Weiser Reported-by: Daniel Kulesz Suggested-by: Chen-Yu Tsai Signed-off-by: Andre Przywara Tested-by: Michael Weiser Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210414104740.31497-1-andre.przywara@arm.com Signed-off-by: Sasha Levin --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts index a1f621b388fe..358df6d926af 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts @@ -10,5 +10,5 @@ / { }; &mmc0 { - cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; /* PF6 push-push switch */ + broken-cd; /* card detect is broken on *some* boards */ }; From f79efcb0075a20633cbf9b47759f2c0d538f78d8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 9 Dec 2020 17:33:49 -0800 Subject: [PATCH 09/39] bpf: Permits pointers on stack for helper calls [ Upstream commit cd17d38f8b28f808c368121041c0a4fa91757e0d ] Currently, when checking stack memory accessed by helper calls, for spills, only PTR_TO_BTF_ID and SCALAR_VALUE are allowed. Song discovered an issue where the below bpf program int dump_task(struct bpf_iter__task *ctx) { struct seq_file *seq = ctx->meta->seq; static char[] info = "abc"; BPF_SEQ_PRINTF(seq, "%s\n", info); return 0; } may cause a verifier failure. The verifier output looks like: ; struct seq_file *seq = ctx->meta->seq; 1: (79) r1 = *(u64 *)(r1 +0) ; BPF_SEQ_PRINTF(seq, "%s\n", info); 2: (18) r2 = 0xffff9054400f6000 4: (7b) *(u64 *)(r10 -8) = r2 5: (bf) r4 = r10 ; 6: (07) r4 += -8 ; BPF_SEQ_PRINTF(seq, "%s\n", info); 7: (18) r2 = 0xffff9054400fe000 9: (b4) w3 = 4 10: (b4) w5 = 8 11: (85) call bpf_seq_printf#126 R1_w=ptr_seq_file(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=4,imm=0) R3_w=inv4 R4_w=fp-8 R5_w=inv8 R10=fp0 fp-8_w=map_value last_idx 11 first_idx 0 regs=8 stack=0 before 10: (b4) w5 = 8 regs=8 stack=0 before 9: (b4) w3 = 4 invalid indirect read from stack off -8+0 size 8 Basically, the verifier complains the map_value pointer at "fp-8" location. To fix the issue, if env->allow_ptr_leaks is true, let us also permit pointers on the stack to be accessible by the helper. Reported-by: Song Liu Suggested-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201210013349.943719-1-yhs@fb.com Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3370f0d476e9..2e09e691a6be 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3759,7 +3759,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, goto mark; if (state->stack[spi].slot_type[0] == STACK_SPILL && - state->stack[spi].spilled_ptr.type == SCALAR_VALUE) { + (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || + env->allow_ptr_leaks)) { __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); for (j = 0; j < BPF_REG_SIZE; j++) state->stack[spi].slot_type[j] = STACK_MISC; From f3c4b01689d392373301e6e60d1b02c5b4020afc Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Sat, 6 Feb 2021 20:10:24 -0500 Subject: [PATCH 10/39] bpf: Allow variable-offset stack access [ Upstream commit 01f810ace9ed37255f27608a0864abebccf0aab3 ] Before this patch, variable offset access to the stack was dissalowed for regular instructions, but was allowed for "indirect" accesses (i.e. helpers). This patch removes the restriction, allowing reading and writing to the stack through stack pointers with variable offsets. This makes stack-allocated buffers more usable in programs, and brings stack pointers closer to other types of pointers. The motivation is being able to use stack-allocated buffers for data manipulation. When the stack size limit is sufficient, allocating buffers on the stack is simpler than per-cpu arrays, or other alternatives. In unpriviledged programs, variable-offset reads and writes are disallowed (they were already disallowed for the indirect access case) because the speculative execution checking code doesn't support them. Additionally, when writing through a variable-offset stack pointer, if any pointers are in the accessible range, there's possilibities of later leaking pointers because the write cannot be tracked precisely. Writes with variable offset mark the whole range as initialized, even though we don't know which stack slots are actually written. This is in order to not reject future reads to these slots. Note that this doesn't affect writes done through helpers; like before, helpers need the whole stack range to be initialized to begin with. All the stack slots are in range are considered scalars after the write; variable-offset register spills are not tracked. For reads, all the stack slots in the variable range needs to be initialized (but see above about what writes do), otherwise the read is rejected. All register spilled in stack slots that might be read are marked as having been read, however reads through such pointers don't do register filling; the target register will always be either a scalar or a constant zero. Signed-off-by: Andrei Matei Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210207011027.676572-2-andreimatei1@gmail.com Signed-off-by: Sasha Levin --- include/linux/bpf.h | 5 + include/linux/bpf_verifier.h | 3 +- kernel/bpf/verifier.c | 659 +++++++++++++++++++++++++++-------- 3 files changed, 519 insertions(+), 148 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b416bba3a62b..8ad819132dde 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1259,6 +1259,11 @@ static inline bool bpf_allow_ptr_leaks(void) return perfmon_capable(); } +static inline bool bpf_allow_uninit_stack(void) +{ + return perfmon_capable(); +} + static inline bool bpf_allow_ptr_to_map_access(void) { return perfmon_capable(); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e83ef6f6bf43..85bac3191e12 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -187,7 +187,7 @@ struct bpf_func_state { * 0 = main function, 1 = first callee. */ u32 frameno; - /* subprog number == index within subprog_stack_depth + /* subprog number == index within subprog_info * zero == main subprog */ u32 subprogno; @@ -390,6 +390,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; + bool allow_uninit_stack; bool allow_ptr_to_map_access; bool bpf_capable; bool bypass_spec_v1; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2e09e691a6be..94923c2bdd81 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2268,12 +2268,14 @@ static void save_register_state(struct bpf_func_state *state, state->stack[spi].slot_type[i] = STACK_SPILL; } -/* check_stack_read/write functions track spill/fill of registers, +/* check_stack_{read,write}_fixed_off functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct bpf_verifier_env *env, - struct bpf_func_state *state, /* func where register points to */ - int off, int size, int value_regno, int insn_idx) +static int check_stack_write_fixed_off(struct bpf_verifier_env *env, + /* stack frame we're writing to */ + struct bpf_func_state *state, + int off, int size, int value_regno, + int insn_idx) { struct bpf_func_state *cur; /* state of the current function */ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; @@ -2399,9 +2401,175 @@ static int check_stack_write(struct bpf_verifier_env *env, return 0; } -static int check_stack_read(struct bpf_verifier_env *env, - struct bpf_func_state *reg_state /* func where register points to */, - int off, int size, int value_regno) +/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is + * known to contain a variable offset. + * This function checks whether the write is permitted and conservatively + * tracks the effects of the write, considering that each stack slot in the + * dynamic range is potentially written to. + * + * 'off' includes 'regno->off'. + * 'value_regno' can be -1, meaning that an unknown value is being written to + * the stack. + * + * Spilled pointers in range are not marked as written because we don't know + * what's going to be actually written. This means that read propagation for + * future reads cannot be terminated by this write. + * + * For privileged programs, uninitialized stack slots are considered + * initialized by this write (even though we don't know exactly what offsets + * are going to be written to). The idea is that we don't want the verifier to + * reject future reads that access slots written to through variable offsets. + */ +static int check_stack_write_var_off(struct bpf_verifier_env *env, + /* func where register points to */ + struct bpf_func_state *state, + int ptr_regno, int off, int size, + int value_regno, int insn_idx) +{ + struct bpf_func_state *cur; /* state of the current function */ + int min_off, max_off; + int i, err; + struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL; + bool writing_zero = false; + /* set if the fact that we're writing a zero is used to let any + * stack slots remain STACK_ZERO + */ + bool zero_used = false; + + cur = env->cur_state->frame[env->cur_state->curframe]; + ptr_reg = &cur->regs[ptr_regno]; + min_off = ptr_reg->smin_value + off; + max_off = ptr_reg->smax_value + off + size; + if (value_regno >= 0) + value_reg = &cur->regs[value_regno]; + if (value_reg && register_is_null(value_reg)) + writing_zero = true; + + err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), + state->acquired_refs, true); + if (err) + return err; + + + /* Variable offset writes destroy any spilled pointers in range. */ + for (i = min_off; i < max_off; i++) { + u8 new_type, *stype; + int slot, spi; + + slot = -i - 1; + spi = slot / BPF_REG_SIZE; + stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; + + if (!env->allow_ptr_leaks + && *stype != NOT_INIT + && *stype != SCALAR_VALUE) { + /* Reject the write if there's are spilled pointers in + * range. If we didn't reject here, the ptr status + * would be erased below (even though not all slots are + * actually overwritten), possibly opening the door to + * leaks. + */ + verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", + insn_idx, i); + return -EINVAL; + } + + /* Erase all spilled pointers. */ + state->stack[spi].spilled_ptr.type = NOT_INIT; + + /* Update the slot type. */ + new_type = STACK_MISC; + if (writing_zero && *stype == STACK_ZERO) { + new_type = STACK_ZERO; + zero_used = true; + } + /* If the slot is STACK_INVALID, we check whether it's OK to + * pretend that it will be initialized by this write. The slot + * might not actually be written to, and so if we mark it as + * initialized future reads might leak uninitialized memory. + * For privileged programs, we will accept such reads to slots + * that may or may not be written because, if we're reject + * them, the error would be too confusing. + */ + if (*stype == STACK_INVALID && !env->allow_uninit_stack) { + verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d", + insn_idx, i); + return -EINVAL; + } + *stype = new_type; + } + if (zero_used) { + /* backtracking doesn't work for STACK_ZERO yet. */ + err = mark_chain_precision(env, value_regno); + if (err) + return err; + } + return 0; +} + +/* When register 'dst_regno' is assigned some values from stack[min_off, + * max_off), we set the register's type according to the types of the + * respective stack slots. If all the stack values are known to be zeros, then + * so is the destination reg. Otherwise, the register is considered to be + * SCALAR. This function does not deal with register filling; the caller must + * ensure that all spilled registers in the stack range have been marked as + * read. + */ +static void mark_reg_stack_read(struct bpf_verifier_env *env, + /* func where src register points to */ + struct bpf_func_state *ptr_state, + int min_off, int max_off, int dst_regno) +{ + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + int i, slot, spi; + u8 *stype; + int zeros = 0; + + for (i = min_off; i < max_off; i++) { + slot = -i - 1; + spi = slot / BPF_REG_SIZE; + stype = ptr_state->stack[spi].slot_type; + if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) + break; + zeros++; + } + if (zeros == max_off - min_off) { + /* any access_size read into register is zero extended, + * so the whole register == const_zero + */ + __mark_reg_const_zero(&state->regs[dst_regno]); + /* backtracking doesn't support STACK_ZERO yet, + * so mark it precise here, so that later + * backtracking can stop here. + * Backtracking may not need this if this register + * doesn't participate in pointer adjustment. + * Forward propagation of precise flag is not + * necessary either. This mark is only to stop + * backtracking. Any register that contributed + * to const 0 was marked precise before spill. + */ + state->regs[dst_regno].precise = true; + } else { + /* have read misc data from the stack */ + mark_reg_unknown(env, state->regs, dst_regno); + } + state->regs[dst_regno].live |= REG_LIVE_WRITTEN; +} + +/* Read the stack at 'off' and put the results into the register indicated by + * 'dst_regno'. It handles reg filling if the addressed stack slot is a + * spilled reg. + * + * 'dst_regno' can be -1, meaning that the read value is not going to a + * register. + * + * The access is assumed to be within the current stack bounds. + */ +static int check_stack_read_fixed_off(struct bpf_verifier_env *env, + /* func where src register points to */ + struct bpf_func_state *reg_state, + int off, int size, int dst_regno) { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_func_state *state = vstate->frame[vstate->curframe]; @@ -2409,11 +2577,6 @@ static int check_stack_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg; u8 *stype; - if (reg_state->allocated_stack <= slot) { - verbose(env, "invalid read from stack off %d+0 size %d\n", - off, size); - return -EACCES; - } stype = reg_state->stack[spi].slot_type; reg = ®_state->stack[spi].spilled_ptr; @@ -2424,9 +2587,9 @@ static int check_stack_read(struct bpf_verifier_env *env, verbose(env, "invalid size of register fill\n"); return -EACCES; } - if (value_regno >= 0) { - mark_reg_unknown(env, state->regs, value_regno); - state->regs[value_regno].live |= REG_LIVE_WRITTEN; + if (dst_regno >= 0) { + mark_reg_unknown(env, state->regs, dst_regno); + state->regs[dst_regno].live |= REG_LIVE_WRITTEN; } mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); return 0; @@ -2438,16 +2601,16 @@ static int check_stack_read(struct bpf_verifier_env *env, } } - if (value_regno >= 0) { + if (dst_regno >= 0) { /* restore register state from stack */ - state->regs[value_regno] = *reg; + state->regs[dst_regno] = *reg; /* mark reg as written since spilled pointer state likely * has its liveness marks cleared by is_state_visited() * which resets stack/reg liveness for state transitions */ - state->regs[value_regno].live |= REG_LIVE_WRITTEN; + state->regs[dst_regno].live |= REG_LIVE_WRITTEN; } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { - /* If value_regno==-1, the caller is asking us whether + /* If dst_regno==-1, the caller is asking us whether * it is acceptable to use this value as a SCALAR_VALUE * (e.g. for XADD). * We must not allow unprivileged callers to do that @@ -2459,70 +2622,167 @@ static int check_stack_read(struct bpf_verifier_env *env, } mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); } else { - int zeros = 0; + u8 type; for (i = 0; i < size; i++) { - if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC) + type = stype[(slot - i) % BPF_REG_SIZE]; + if (type == STACK_MISC) continue; - if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) { - zeros++; + if (type == STACK_ZERO) continue; - } verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; } mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); - if (value_regno >= 0) { - if (zeros == size) { - /* any size read into register is zero extended, - * so the whole register == const_zero - */ - __mark_reg_const_zero(&state->regs[value_regno]); - /* backtracking doesn't support STACK_ZERO yet, - * so mark it precise here, so that later - * backtracking can stop here. - * Backtracking may not need this if this register - * doesn't participate in pointer adjustment. - * Forward propagation of precise flag is not - * necessary either. This mark is only to stop - * backtracking. Any register that contributed - * to const 0 was marked precise before spill. - */ - state->regs[value_regno].precise = true; - } else { - /* have read misc data from the stack */ - mark_reg_unknown(env, state->regs, value_regno); - } - state->regs[value_regno].live |= REG_LIVE_WRITTEN; - } + if (dst_regno >= 0) + mark_reg_stack_read(env, reg_state, off, off + size, dst_regno); } return 0; } -static int check_stack_access(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, - int off, int size) +enum stack_access_src { + ACCESS_DIRECT = 1, /* the access is performed by an instruction */ + ACCESS_HELPER = 2, /* the access is performed by a helper */ +}; + +static int check_stack_range_initialized(struct bpf_verifier_env *env, + int regno, int off, int access_size, + bool zero_size_allowed, + enum stack_access_src type, + struct bpf_call_arg_meta *meta); + +static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) { - /* Stack accesses must be at a fixed offset, so that we - * can determine what type of data were returned. See - * check_stack_read(). + return cur_regs(env) + regno; +} + +/* Read the stack at 'ptr_regno + off' and put the result into the register + * 'dst_regno'. + * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'), + * but not its variable offset. + * 'size' is assumed to be <= reg size and the access is assumed to be aligned. + * + * As opposed to check_stack_read_fixed_off, this function doesn't deal with + * filling registers (i.e. reads of spilled register cannot be detected when + * the offset is not fixed). We conservatively mark 'dst_regno' as containing + * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable + * offset; for a fixed offset check_stack_read_fixed_off should be used + * instead. + */ +static int check_stack_read_var_off(struct bpf_verifier_env *env, + int ptr_regno, int off, int size, int dst_regno) +{ + /* The state of the source register. */ + struct bpf_reg_state *reg = reg_state(env, ptr_regno); + struct bpf_func_state *ptr_state = func(env, reg); + int err; + int min_off, max_off; + + /* Note that we pass a NULL meta, so raw access will not be permitted. */ - if (!tnum_is_const(reg->var_off)) { + err = check_stack_range_initialized(env, ptr_regno, off, size, + false, ACCESS_DIRECT, NULL); + if (err) + return err; + + min_off = reg->smin_value + off; + max_off = reg->smax_value + off; + mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno); + return 0; +} + +/* check_stack_read dispatches to check_stack_read_fixed_off or + * check_stack_read_var_off. + * + * The caller must ensure that the offset falls within the allocated stack + * bounds. + * + * 'dst_regno' is a register which will receive the value from the stack. It + * can be -1, meaning that the read value is not going to a register. + */ +static int check_stack_read(struct bpf_verifier_env *env, + int ptr_regno, int off, int size, + int dst_regno) +{ + struct bpf_reg_state *reg = reg_state(env, ptr_regno); + struct bpf_func_state *state = func(env, reg); + int err; + /* Some accesses are only permitted with a static offset. */ + bool var_off = !tnum_is_const(reg->var_off); + + /* The offset is required to be static when reads don't go to a + * register, in order to not leak pointers (see + * check_stack_read_fixed_off). + */ + if (dst_regno < 0 && var_off) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable stack access var_off=%s off=%d size=%d\n", + verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n", tn_buf, off, size); return -EACCES; } + /* Variable offset is prohibited for unprivileged mode for simplicity + * since it requires corresponding support in Spectre masking for stack + * ALU. See also retrieve_ptr_limit(). + */ + if (!env->bypass_spec_v1 && var_off) { + char tn_buf[48]; - if (off >= 0 || off < -MAX_BPF_STACK) { - verbose(env, "invalid stack off=%d size=%d\n", off, size); + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", + ptr_regno, tn_buf); return -EACCES; } - return 0; + if (!var_off) { + off += reg->var_off.value; + err = check_stack_read_fixed_off(env, state, off, size, + dst_regno); + } else { + /* Variable offset stack reads need more conservative handling + * than fixed offset ones. Note that dst_regno >= 0 on this + * branch. + */ + err = check_stack_read_var_off(env, ptr_regno, off, size, + dst_regno); + } + return err; +} + + +/* check_stack_write dispatches to check_stack_write_fixed_off or + * check_stack_write_var_off. + * + * 'ptr_regno' is the register used as a pointer into the stack. + * 'off' includes 'ptr_regno->off', but not its variable offset (if any). + * 'value_regno' is the register whose value we're writing to the stack. It can + * be -1, meaning that we're not writing from a register. + * + * The caller must ensure that the offset falls within the maximum stack size. + */ +static int check_stack_write(struct bpf_verifier_env *env, + int ptr_regno, int off, int size, + int value_regno, int insn_idx) +{ + struct bpf_reg_state *reg = reg_state(env, ptr_regno); + struct bpf_func_state *state = func(env, reg); + int err; + + if (tnum_is_const(reg->var_off)) { + off += reg->var_off.value; + err = check_stack_write_fixed_off(env, state, off, size, + value_regno, insn_idx); + } else { + /* Variable offset stack reads need more conservative handling + * than fixed offset ones. + */ + err = check_stack_write_var_off(env, state, + ptr_regno, off, size, + value_regno, insn_idx); + } + return err; } static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, @@ -2851,11 +3111,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, return -EACCES; } -static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) -{ - return cur_regs(env) + regno; -} - static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno)); @@ -2974,8 +3229,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, break; case PTR_TO_STACK: pointer_desc = "stack "; - /* The stack spill tracking logic in check_stack_write() - * and check_stack_read() relies on stack accesses being + /* The stack spill tracking logic in check_stack_write_fixed_off() + * and check_stack_read_fixed_off() relies on stack accesses being * aligned. */ strict = true; @@ -3393,6 +3648,91 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, return 0; } +/* Check that the stack access at the given offset is within bounds. The + * maximum valid offset is -1. + * + * The minimum valid offset is -MAX_BPF_STACK for writes, and + * -state->allocated_stack for reads. + */ +static int check_stack_slot_within_bounds(int off, + struct bpf_func_state *state, + enum bpf_access_type t) +{ + int min_valid_off; + + if (t == BPF_WRITE) + min_valid_off = -MAX_BPF_STACK; + else + min_valid_off = -state->allocated_stack; + + if (off < min_valid_off || off > -1) + return -EACCES; + return 0; +} + +/* Check that the stack access at 'regno + off' falls within the maximum stack + * bounds. + * + * 'off' includes `regno->offset`, but not its dynamic part (if any). + */ +static int check_stack_access_within_bounds( + struct bpf_verifier_env *env, + int regno, int off, int access_size, + enum stack_access_src src, enum bpf_access_type type) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = regs + regno; + struct bpf_func_state *state = func(env, reg); + int min_off, max_off; + int err; + char *err_extra; + + if (src == ACCESS_HELPER) + /* We don't know if helpers are reading or writing (or both). */ + err_extra = " indirect access to"; + else if (type == BPF_READ) + err_extra = " read from"; + else + err_extra = " write to"; + + if (tnum_is_const(reg->var_off)) { + min_off = reg->var_off.value + off; + if (access_size > 0) + max_off = min_off + access_size - 1; + else + max_off = min_off; + } else { + if (reg->smax_value >= BPF_MAX_VAR_OFF || + reg->smin_value <= -BPF_MAX_VAR_OFF) { + verbose(env, "invalid unbounded variable-offset%s stack R%d\n", + err_extra, regno); + return -EACCES; + } + min_off = reg->smin_value + off; + if (access_size > 0) + max_off = reg->smax_value + off + access_size - 1; + else + max_off = min_off; + } + + err = check_stack_slot_within_bounds(min_off, state, type); + if (!err) + err = check_stack_slot_within_bounds(max_off, state, type); + + if (err) { + if (tnum_is_const(reg->var_off)) { + verbose(env, "invalid%s stack R%d off=%d size=%d\n", + err_extra, regno, off, access_size); + } else { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n", + err_extra, regno, tn_buf, access_size); + } + } + return err; +} /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory @@ -3505,8 +3845,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } } else if (reg->type == PTR_TO_STACK) { - off += reg->var_off.value; - err = check_stack_access(env, reg, off, size); + /* Basic bounds checks. */ + err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t); if (err) return err; @@ -3515,12 +3855,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (err) return err; - if (t == BPF_WRITE) - err = check_stack_write(env, state, off, size, - value_regno, insn_idx); - else - err = check_stack_read(env, state, off, size, + if (t == BPF_READ) + err = check_stack_read(env, regno, off, size, value_regno); + else + err = check_stack_write(env, regno, off, size, + value_regno, insn_idx); } else if (reg_is_pkt_pointer(reg)) { if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { verbose(env, "cannot write into packet\n"); @@ -3642,49 +3982,53 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins BPF_SIZE(insn->code), BPF_WRITE, -1, true); } -static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, - int off, int access_size, - bool zero_size_allowed) -{ - struct bpf_reg_state *reg = reg_state(env, regno); - - if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || - access_size < 0 || (access_size == 0 && !zero_size_allowed)) { - if (tnum_is_const(reg->var_off)) { - verbose(env, "invalid stack type R%d off=%d access_size=%d\n", - regno, off, access_size); - } else { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", - regno, tn_buf, access_size); - } - return -EACCES; - } - return 0; -} - -/* when register 'regno' is passed into function that will read 'access_size' - * bytes from that pointer, make sure that it's within stack boundary - * and all elements of stack are initialized. - * Unlike most pointer bounds-checking functions, this one doesn't take an - * 'off' argument, so it has to add in reg->off itself. +/* When register 'regno' is used to read the stack (either directly or through + * a helper function) make sure that it's within stack boundary and, depending + * on the access type, that all elements of the stack are initialized. + * + * 'off' includes 'regno->off', but not its dynamic part (if any). + * + * All registers that have been spilled on the stack in the slots within the + * read offsets are marked as read. */ -static int check_stack_boundary(struct bpf_verifier_env *env, int regno, - int access_size, bool zero_size_allowed, - struct bpf_call_arg_meta *meta) +static int check_stack_range_initialized( + struct bpf_verifier_env *env, int regno, int off, + int access_size, bool zero_size_allowed, + enum stack_access_src type, struct bpf_call_arg_meta *meta) { struct bpf_reg_state *reg = reg_state(env, regno); struct bpf_func_state *state = func(env, reg); int err, min_off, max_off, i, j, slot, spi; + char *err_extra = type == ACCESS_HELPER ? " indirect" : ""; + enum bpf_access_type bounds_check_type; + /* Some accesses can write anything into the stack, others are + * read-only. + */ + bool clobber = false; + + if (access_size == 0 && !zero_size_allowed) { + verbose(env, "invalid zero-sized read\n"); + return -EACCES; + } + + if (type == ACCESS_HELPER) { + /* The bounds checks for writes are more permissive than for + * reads. However, if raw_mode is not set, we'll do extra + * checks below. + */ + bounds_check_type = BPF_WRITE; + clobber = true; + } else { + bounds_check_type = BPF_READ; + } + err = check_stack_access_within_bounds(env, regno, off, access_size, + type, bounds_check_type); + if (err) + return err; + if (tnum_is_const(reg->var_off)) { - min_off = max_off = reg->var_off.value + reg->off; - err = __check_stack_boundary(env, regno, min_off, access_size, - zero_size_allowed); - if (err) - return err; + min_off = max_off = reg->var_off.value + off; } else { /* Variable offset is prohibited for unprivileged mode for * simplicity since it requires corresponding support in @@ -3695,8 +4039,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", - regno, tn_buf); + verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n", + regno, err_extra, tn_buf); return -EACCES; } /* Only initialized buffer on stack is allowed to be accessed @@ -3708,28 +4052,8 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, if (meta && meta->raw_mode) meta = NULL; - if (reg->smax_value >= BPF_MAX_VAR_OFF || - reg->smax_value <= -BPF_MAX_VAR_OFF) { - verbose(env, "R%d unbounded indirect variable offset stack access\n", - regno); - return -EACCES; - } - min_off = reg->smin_value + reg->off; - max_off = reg->smax_value + reg->off; - err = __check_stack_boundary(env, regno, min_off, access_size, - zero_size_allowed); - if (err) { - verbose(env, "R%d min value is outside of stack bound\n", - regno); - return err; - } - err = __check_stack_boundary(env, regno, max_off, access_size, - zero_size_allowed); - if (err) { - verbose(env, "R%d max value is outside of stack bound\n", - regno); - return err; - } + min_off = reg->smin_value + off; + max_off = reg->smax_value + off; } if (meta && meta->raw_mode) { @@ -3749,8 +4073,10 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, if (*stype == STACK_MISC) goto mark; if (*stype == STACK_ZERO) { - /* helper can write anything into the stack */ - *stype = STACK_MISC; + if (clobber) { + /* helper can write anything into the stack */ + *stype = STACK_MISC; + } goto mark; } @@ -3761,22 +4087,24 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, if (state->stack[spi].slot_type[0] == STACK_SPILL && (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || env->allow_ptr_leaks)) { - __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); - for (j = 0; j < BPF_REG_SIZE; j++) - state->stack[spi].slot_type[j] = STACK_MISC; + if (clobber) { + __mark_reg_unknown(env, &state->stack[spi].spilled_ptr); + for (j = 0; j < BPF_REG_SIZE; j++) + state->stack[spi].slot_type[j] = STACK_MISC; + } goto mark; } err: if (tnum_is_const(reg->var_off)) { - verbose(env, "invalid indirect read from stack off %d+%d size %d\n", - min_off, i - min_off, access_size); + verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", + err_extra, regno, min_off, i - min_off, access_size); } else { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", - tn_buf, i - min_off, access_size); + verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n", + err_extra, regno, tn_buf, i - min_off, access_size); } return -EACCES; mark: @@ -3825,8 +4153,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, "rdwr", &env->prog->aux->max_rdwr_access); case PTR_TO_STACK: - return check_stack_boundary(env, regno, access_size, - zero_size_allowed, meta); + return check_stack_range_initialized( + env, + regno, reg->off, access_size, + zero_size_allowed, ACCESS_HELPER, meta); default: /* scalar_value or invalid ptr */ /* Allow zero-byte read from NULL, regardless of pointer type */ if (zero_size_allowed && access_size == 0 && @@ -5519,6 +5849,41 @@ static int sanitize_err(struct bpf_verifier_env *env, return -EACCES; } +/* check that stack access falls within stack limits and that 'reg' doesn't + * have a variable offset. + * + * Variable offset is prohibited for unprivileged mode for simplicity since it + * requires corresponding support in Spectre masking for stack ALU. See also + * retrieve_ptr_limit(). + * + * + * 'off' includes 'reg->off'. + */ +static int check_stack_access_for_ptr_arithmetic( + struct bpf_verifier_env *env, + int regno, + const struct bpf_reg_state *reg, + int off) +{ + if (!tnum_is_const(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", + regno, tn_buf, off); + return -EACCES; + } + + if (off >= 0 || off < -MAX_BPF_STACK) { + verbose(env, "R%d stack pointer arithmetic goes out of range, " + "prohibited for !root; off=%d\n", regno, off); + return -EACCES; + } + + return 0; +} + + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. * If we return -EACCES, caller may want to try again treating pointer as a @@ -5753,10 +6118,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, "prohibited for !root\n", dst); return -EACCES; } else if (dst_reg->type == PTR_TO_STACK && - check_stack_access(env, dst_reg, dst_reg->off + - dst_reg->var_off.value, 1)) { - verbose(env, "R%d stack pointer arithmetic goes out of range, " - "prohibited for !root\n", dst); + check_stack_access_for_ptr_arithmetic( + env, dst, dst_reg, dst_reg->off + + dst_reg->var_off.value)) { return -EACCES; } } @@ -11952,6 +12316,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->strict_alignment = false; env->allow_ptr_leaks = bpf_allow_ptr_leaks(); + env->allow_uninit_stack = bpf_allow_uninit_stack(); env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access(); env->bypass_spec_v1 = bpf_bypass_spec_v1(); env->bypass_spec_v4 = bpf_bypass_spec_v4(); From 2982ea926b5cb97ff79fbb27eba72521568811ff Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 23 Mar 2021 15:05:48 +0100 Subject: [PATCH 11/39] bpf: Refactor and streamline bounds check into helper [ Upstream commit 073815b756c51ba9d8384d924c5d1c03ca3d1ae4 ] Move the bounds check in adjust_ptr_min_max_vals() into a small helper named sanitize_check_bounds() in order to simplify the former a bit. Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 94923c2bdd81..1b97fd364ce2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5883,6 +5883,37 @@ static int check_stack_access_for_ptr_arithmetic( return 0; } +static int sanitize_check_bounds(struct bpf_verifier_env *env, + const struct bpf_insn *insn, + const struct bpf_reg_state *dst_reg) +{ + u32 dst = insn->dst_reg; + + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ + if (env->bypass_spec_v1) + return 0; + + switch (dst_reg->type) { + case PTR_TO_STACK: + if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, + dst_reg->off + dst_reg->var_off.value)) + return -EACCES; + break; + case PTR_TO_MAP_VALUE: + if (check_map_access(env, dst, dst_reg->off, 1, false)) { + verbose(env, "R%d pointer arithmetic of map value goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } + break; + default: + break; + } + + return 0; +} /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. @@ -6108,22 +6139,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); - /* For unprivileged we require that resulting offset must be in bounds - * in order to be able to sanitize access later on. - */ - if (!env->bypass_spec_v1) { - if (dst_reg->type == PTR_TO_MAP_VALUE && - check_map_access(env, dst, dst_reg->off, 1, false)) { - verbose(env, "R%d pointer arithmetic of map value goes out of range, " - "prohibited for !root\n", dst); - return -EACCES; - } else if (dst_reg->type == PTR_TO_STACK && - check_stack_access_for_ptr_arithmetic( - env, dst, dst_reg, dst_reg->off + - dst_reg->var_off.value)) { - return -EACCES; - } - } + if (sanitize_check_bounds(env, insn, dst_reg) < 0) + return -EACCES; return 0; } From b642e493a9a0ed56be3b8cfcfb95fe3c7cea0b55 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Mar 2021 10:38:26 +0100 Subject: [PATCH 12/39] bpf: Tighten speculative pointer arithmetic mask [ Upstream commit 7fedb63a8307dda0ec3b8969a3b233a1dd7ea8e0 ] This work tightens the offset mask we use for unprivileged pointer arithmetic in order to mitigate a corner case reported by Piotr and Benedict where in the speculative domain it is possible to advance, for example, the map value pointer by up to value_size-1 out-of-bounds in order to leak kernel memory via side-channel to user space. Before this change, the computed ptr_limit for retrieve_ptr_limit() helper represents largest valid distance when moving pointer to the right or left which is then fed as aux->alu_limit to generate masking instructions against the offset register. After the change, the derived aux->alu_limit represents the largest potential value of the offset register which we mask against which is just a narrower subset of the former limit. For minimal complexity, we call sanitize_ptr_alu() from 2 observation points in adjust_ptr_min_max_vals(), that is, before and after the simulated alu operation. In the first step, we retieve the alu_state and alu_limit before the operation as well as we branch-off a verifier path and push it to the verification stack as we did before which checks the dst_reg under truncation, in other words, when the speculative domain would attempt to move the pointer out-of-bounds. In the second step, we retrieve the new alu_limit and calculate the absolute distance between both. Moreover, we commit the alu_state and final alu_limit via update_alu_sanitation_state() to the env's instruction aux data, and bail out from there if there is a mismatch due to coming from different verification paths with different states. Reported-by: Piotr Krysiuk Reported-by: Benedict Schlueter Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov Tested-by: Benedict Schlueter Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 73 ++++++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1b97fd364ce2..b9180509917e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5674,7 +5674,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off, max = 0, ptr_limit = 0; + u32 max = 0, ptr_limit = 0; if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) @@ -5683,26 +5683,18 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, switch (ptr_reg->type) { case PTR_TO_STACK: /* Offset 0 is out-of-bounds, but acceptable start for the - * left direction, see BPF_REG_FP. + * left direction, see BPF_REG_FP. Also, unknown scalar + * offset where we would need to deal with min/max bounds is + * currently prohibited for unprivileged. */ max = MAX_BPF_STACK + mask_to_left; - /* Indirect variable offset stack access is prohibited in - * unprivileged mode so it's not handled here. - */ - off = ptr_reg->off + ptr_reg->var_off.value; - if (mask_to_left) - ptr_limit = MAX_BPF_STACK + off; - else - ptr_limit = -off - 1; + ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off); break; case PTR_TO_MAP_VALUE: max = ptr_reg->map_ptr->value_size; - if (mask_to_left) { - ptr_limit = ptr_reg->umax_value + ptr_reg->off; - } else { - off = ptr_reg->smin_value + ptr_reg->off; - ptr_limit = ptr_reg->map_ptr->value_size - off - 1; - } + ptr_limit = (mask_to_left ? + ptr_reg->smin_value : + ptr_reg->umax_value) + ptr_reg->off; break; default: return REASON_TYPE; @@ -5757,10 +5749,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + struct bpf_insn_aux_data *tmp_aux, + const bool commit_window) { + struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux; struct bpf_verifier_state *vstate = env->cur_state; - struct bpf_insn_aux_data *aux = cur_aux(env); bool off_is_neg = off_reg->smin_value < 0; bool ptr_is_dst_reg = ptr_reg == dst_reg; u8 opcode = BPF_OP(insn->code); @@ -5779,18 +5773,33 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (vstate->speculative) goto do_sim; - alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; - alu_state |= ptr_is_dst_reg ? - BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; - err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); if (err < 0) return err; + if (commit_window) { + /* In commit phase we narrow the masking window based on + * the observed pointer move after the simulated operation. + */ + alu_state = tmp_aux->alu_state; + alu_limit = abs(tmp_aux->alu_limit - alu_limit); + } else { + alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; + alu_state |= ptr_is_dst_reg ? + BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + } + err = update_alu_sanitation_state(aux, alu_state, alu_limit); if (err < 0) return err; do_sim: + /* If we're in commit phase, we're done here given we already + * pushed the truncated dst_reg into the speculative verification + * stack. + */ + if (commit_window) + return 0; + /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of * masking when off was not within expected range. If off @@ -5933,6 +5942,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + struct bpf_insn_aux_data tmp_aux = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; int ret; @@ -5999,12 +6009,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, /* pointer types do not carry 32-bit bounds at the moment. */ __mark_reg32_unbounded(dst_reg); - switch (opcode) { - case BPF_ADD: - ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); + if (sanitize_needed(opcode)) { + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, + &tmp_aux, false); if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); + } + switch (opcode) { + case BPF_ADD: /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -6055,10 +6068,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: - ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); - if (ret < 0) - return sanitize_err(env, insn, ret, off_reg, dst_reg); - if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ verbose(env, "R%d tried to subtract pointer from scalar\n", @@ -6141,6 +6150,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, if (sanitize_check_bounds(env, insn, dst_reg) < 0) return -EACCES; + if (sanitize_needed(opcode)) { + ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, + &tmp_aux, true); + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); + } return 0; } From 82fa9ced35d88581cffa4a1c856fc41fca96d80a Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Thu, 15 Apr 2021 17:27:11 +0000 Subject: [PATCH 13/39] locking/qrwlock: Fix ordering in queued_write_lock_slowpath() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 84a24bf8c52e66b7ac89ada5e3cfbe72d65c1896 ] While this code is executed with the wait_lock held, a reader can acquire the lock without holding wait_lock. The writer side loops checking the value with the atomic_cond_read_acquire(), but only truly acquires the lock when the compare-and-exchange is completed successfully which isn’t ordered. This exposes the window between the acquire and the cmpxchg to an A-B-A problem which allows reads following the lock acquisition to observe values speculatively before the write lock is truly acquired. We've seen a problem in epoll where the reader does a xchg while holding the read lock, but the writer can see a value change out from under it. Writer | Reader -------------------------------------------------------------------------------- ep_scan_ready_list() | |- write_lock_irq() | |- queued_write_lock_slowpath() | |- atomic_cond_read_acquire() | | read_lock_irqsave(&ep->lock, flags); --> (observes value before unlock) | chain_epi_lockless() | | epi->next = xchg(&ep->ovflist, epi); | | read_unlock_irqrestore(&ep->lock, flags); | | | atomic_cmpxchg_relaxed() | |-- READ_ONCE(ep->ovflist); | A core can order the read of the ovflist ahead of the atomic_cmpxchg_relaxed(). Switching the cmpxchg to use acquire semantics addresses this issue at which point the atomic_cond_read can be switched to use relaxed semantics. Fixes: b519b56e378ee ("locking/qrwlock: Use atomic_cond_read_acquire() when spinning in qrwlock") Signed-off-by: Ali Saidi [peterz: use try_cmpxchg()] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steve Capper Acked-by: Will Deacon Acked-by: Waiman Long Tested-by: Steve Capper Signed-off-by: Sasha Levin --- kernel/locking/qrwlock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index fe9ca92faa2a..909b0bf22a1e 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -61,6 +61,8 @@ EXPORT_SYMBOL(queued_read_lock_slowpath); */ void queued_write_lock_slowpath(struct qrwlock *lock) { + int cnts; + /* Put the writer into the wait queue */ arch_spin_lock(&lock->wait_lock); @@ -74,9 +76,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock) /* When no more readers or writers, set the locked flag */ do { - atomic_cond_read_acquire(&lock->cnts, VAL == _QW_WAITING); - } while (atomic_cmpxchg_relaxed(&lock->cnts, _QW_WAITING, - _QW_LOCKED) != _QW_WAITING); + cnts = atomic_cond_read_relaxed(&lock->cnts, VAL == _QW_WAITING); + } while (!atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)); unlock: arch_spin_unlock(&lock->wait_lock); } From 6f8315e5d9511ed1cf28ee2afbc9f89ff693de7b Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 15 Apr 2021 14:22:43 -0700 Subject: [PATCH 14/39] perf/x86/intel/uncore: Remove uncore extra PCI dev HSWEP_PCI_PCU_3 [ Upstream commit 9d480158ee86ad606d3a8baaf81e6b71acbfd7d5 ] There may be a kernel panic on the Haswell server and the Broadwell server, if the snbep_pci2phy_map_init() return error. The uncore_extra_pci_dev[HSWEP_PCI_PCU_3] is used in the cpu_init() to detect the existence of the SBOX, which is a MSR type of PMON unit. The uncore_extra_pci_dev is allocated in the uncore_pci_init(). If the snbep_pci2phy_map_init() returns error, perf doesn't initialize the PCI type of the PMON units, so the uncore_extra_pci_dev will not be allocated. But perf may continue initializing the MSR type of PMON units. A null dereference kernel panic will be triggered. The sockets in a Haswell server or a Broadwell server are identical. Only need to detect the existence of the SBOX once. Current perf probes all available PCU devices and stores them into the uncore_extra_pci_dev. It's unnecessary. Use the pci_get_device() to replace the uncore_extra_pci_dev. Only detect the existence of the SBOX on the first available PCU device once. Factor out hswep_has_limit_sbox(), since the Haswell server and the Broadwell server uses the same way to detect the existence of the SBOX. Add some macros to replace the magic number. Fixes: 5306c31c5733 ("perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes") Reported-by: Steve Wahl Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Tested-by: Steve Wahl Link: https://lkml.kernel.org/r/1618521764-100923-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Sasha Levin --- arch/x86/events/intel/uncore_snbep.c | 59 ++++++++++++---------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 7bdb1821215d..3112186a4f4b 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1159,7 +1159,6 @@ enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, BDX_PCI_QPI_PORT2_FILTER, - HSWEP_PCI_PCU_3, }; static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) @@ -2816,22 +2815,33 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { NULL, }; +#define HSWEP_PCU_DID 0x2fc0 +#define HSWEP_PCU_CAPID4_OFFET 0x94 +#define hswep_get_chop(_cap) (((_cap) >> 6) & 0x3) + +static bool hswep_has_limit_sbox(unsigned int device) +{ + struct pci_dev *dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, NULL); + u32 capid4; + + if (!dev) + return false; + + pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4); + if (!hswep_get_chop(capid4)) + return true; + + return false; +} + void hswep_uncore_cpu_init(void) { - int pkg = boot_cpu_data.logical_proc_id; - if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; /* Detect 6-8 core systems with only two SBOXes */ - if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { - u32 capid4; - - pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3], - 0x94, &capid4); - if (((capid4 >> 6) & 0x3) == 0) - hswep_uncore_sbox.num_boxes = 2; - } + if (hswep_has_limit_sbox(HSWEP_PCU_DID)) + hswep_uncore_sbox.num_boxes = 2; uncore_msr_uncores = hswep_msr_uncores; } @@ -3094,11 +3104,6 @@ static const struct pci_device_id hswep_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, SNBEP_PCI_QPI_PORT1_FILTER), }, - { /* PCU.3 (for Capability registers) */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - HSWEP_PCI_PCU_3), - }, { /* end: all zeroes */ } }; @@ -3190,27 +3195,18 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { EVENT_CONSTRAINT_END }; +#define BDX_PCU_DID 0x6fc0 + void bdx_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; - /* BDX-DE doesn't have SBOX */ - if (boot_cpu_data.x86_model == 86) { - uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; /* Detect systems with no SBOXes */ - } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { - struct pci_dev *pdev; - u32 capid4; + if ((boot_cpu_data.x86_model == 86) || hswep_has_limit_sbox(BDX_PCU_DID)) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; - pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]; - pci_read_config_dword(pdev, 0x94, &capid4); - if (((capid4 >> 6) & 0x3) == 0) - bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; - } hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; } @@ -3431,11 +3427,6 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, BDX_PCI_QPI_PORT2_FILTER), }, - { /* PCU.3 (for Capability registers) */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - HSWEP_PCI_PCU_3), - }, { /* end: all zeroes */ } }; From ab112cc573ccde3cff7e9159d5fe21c793242b55 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 21 Apr 2021 17:18:34 -0700 Subject: [PATCH 15/39] perf/x86/kvm: Fix Broadwell Xeon stepping in isolation_ucodes[] [ Upstream commit 4b2f1e59229b9da319d358828cdfa4ddbc140769 ] The only stepping of Broadwell Xeon parts is stepping 1. Fix the relevant isolation_ucodes[] entry, which previously enumerated stepping 2. Although the original commit was characterized as an optimization, it is also a workaround for a correctness issue. If a PMI arrives between kvm's call to perf_guest_get_msrs() and the subsequent VM-entry, a stale value for the IA32_PEBS_ENABLE MSR may be restored at the next VM-exit. This is because, unbeknownst to kvm, PMI throttling may clear bits in the IA32_PEBS_ENABLE MSR. CPUs with "PEBS isolation" don't suffer from this issue, because perf_guest_get_msrs() doesn't report the IA32_PEBS_ENABLE value. Fixes: 9b545c04abd4f ("perf/x86/kvm: Avoid unnecessary work in guest filtering") Signed-off-by: Jim Mattson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Peter Shier Acked-by: Andi Kleen Link: https://lkml.kernel.org/r/20210422001834.1748319-1-jmattson@google.com Signed-off-by: Sasha Levin --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e7dc13fe5e29..0b9975200ae3 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4387,7 +4387,7 @@ static const struct x86_cpu_desc isolation_ucodes[] = { INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009), INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009), INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), From 4d0cfb3713bc3263cd4b6d43d5fcb96c7fdaead3 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 20 Apr 2021 23:15:53 +0800 Subject: [PATCH 16/39] perf auxtrace: Fix potential NULL pointer dereference [ Upstream commit b14585d9f18dc617e975815570fe836be656b1da ] In the function auxtrace_parse_snapshot_options(), the callback pointer "itr->parse_snapshot_options" can be NULL if it has not been set during the AUX record initialization. This can cause tool crashing if the callback pointer "itr->parse_snapshot_options" is dereferenced without performing NULL check. Add a NULL check for the pointer "itr->parse_snapshot_options" before invoke the callback. Fixes: d20031bb63dd6dde ("perf tools: Add AUX area tracing Snapshot Mode") Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Tiezhu Yang Link: http://lore.kernel.org/lkml/20210420151554.2031768-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/auxtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index d8ada6a3c555..d3c15b53495d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -636,7 +636,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, break; } - if (itr) + if (itr && itr->parse_snapshot_options) return itr->parse_snapshot_options(itr, opts, str); pr_err("No AUX area tracing to snapshot\n"); From ffe249b4fc2ce48a6e32485bd593a28e00448128 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 15 Apr 2021 17:27:44 +0800 Subject: [PATCH 17/39] perf map: Fix error return code in maps__clone() [ Upstream commit c6f87141254d16e281e4b4431af7316895207b8f ] Although 'err' has been initialized to -ENOMEM, but it will be reassigned by the "err = unwind__prepare_access(...)" statement in the for loop. So that, the value of 'err' is unknown when map__clone() failed. Fixes: 6c502584438bda63 ("perf unwind: Call unwind__prepare_access for forked thread") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: zhen lei Link: http://lore.kernel.org/lkml/20210415092744.3793-1-thunder.leizhen@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/map.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index e2537d5acab0..f4d44f75ba15 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -836,15 +836,18 @@ int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) int maps__clone(struct thread *thread, struct maps *parent) { struct maps *maps = thread->maps; - int err = -ENOMEM; + int err; struct map *map; down_read(&parent->lock); maps__for_each_entry(parent, map) { struct map *new = map__clone(map); - if (new == NULL) + + if (new == NULL) { + err = -ENOMEM; goto out_unlock; + } err = unwind__prepare_access(maps, new, NULL); if (err) From 079e32723f78ef814f3973b4598b581275463836 Mon Sep 17 00:00:00 2001 From: Shou-Chieh Hsu Date: Tue, 2 Mar 2021 11:58:01 +0800 Subject: [PATCH 18/39] HID: google: add don USB id [ Upstream commit 36b87cf302a4f13f8b4344bcf98f67405a145e2f ] Add 1 additional hammer-like device. Signed-off-by: Shou-Chieh Hsu Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 85a054f1ce38..2a176f77b32e 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -526,6 +526,8 @@ static void hammer_remove(struct hid_device *hdev) } static const struct hid_device_id hammer_devices[] = { + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_DON) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 06813f297dcc..b93ce0d475e0 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -486,6 +486,7 @@ #define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 +#define USB_DEVICE_ID_GOOGLE_DON 0x5050 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f From f691dc86411d80005dcf26fedd5c95c834a9da09 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Thu, 4 Mar 2021 05:19:57 -0800 Subject: [PATCH 19/39] HID: alps: fix error return code in alps_input_configured() [ Upstream commit fa8ba6e5dc0e78e409e503ddcfceef5dd96527f4 ] When input_register_device() fails, no error return code is assigned. To fix this bug, ret is assigned with -ENOENT as error return code. Reported-by: TOTE Robot Signed-off-by: Jia-Ju Bai Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-alps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index 3feaece13ade..6b665931147d 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -761,6 +761,7 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi) if (input_register_device(data->input2)) { input_free_device(input2); + ret = -ENOENT; goto exit; } } From e913cbc952c300cd99ce40538b1c53438f9f7ffd Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Sat, 9 Jan 2021 17:36:58 -0500 Subject: [PATCH 20/39] HID cp2112: fix support for multiple gpiochips [ Upstream commit 2a2b09c867fdac63f430a45051e7bd0c46edc381 ] In lk 5.11.0-rc2 connecting a USB based Silicon Labs HID to I2C bridge evaluation board (CP2112EK) causes this warning: gpio gpiochip0: (cp2112_gpio): detected irqchip that is shared with multiple gpiochips: please fix the driver Simply copy what other gpio related drivers do to fix this particular warning: replicate the struct irq_chip object in each device instance rather than have a static object which makes that object (incorrectly) shared by each device. Signed-off-by: Douglas Gilbert Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-cp2112.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index 21e15627a461..477baa30889c 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -161,6 +161,7 @@ struct cp2112_device { atomic_t read_avail; atomic_t xfer_avail; struct gpio_chip gc; + struct irq_chip irq; u8 *in_out_buffer; struct mutex lock; @@ -1175,16 +1176,6 @@ static int cp2112_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static struct irq_chip cp2112_gpio_irqchip = { - .name = "cp2112-gpio", - .irq_startup = cp2112_gpio_irq_startup, - .irq_shutdown = cp2112_gpio_irq_shutdown, - .irq_ack = cp2112_gpio_irq_ack, - .irq_mask = cp2112_gpio_irq_mask, - .irq_unmask = cp2112_gpio_irq_unmask, - .irq_set_type = cp2112_gpio_irq_type, -}; - static int __maybe_unused cp2112_allocate_irq(struct cp2112_device *dev, int pin) { @@ -1339,8 +1330,17 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id) dev->gc.can_sleep = 1; dev->gc.parent = &hdev->dev; + dev->irq.name = "cp2112-gpio"; + dev->irq.irq_startup = cp2112_gpio_irq_startup; + dev->irq.irq_shutdown = cp2112_gpio_irq_shutdown; + dev->irq.irq_ack = cp2112_gpio_irq_ack; + dev->irq.irq_mask = cp2112_gpio_irq_mask; + dev->irq.irq_unmask = cp2112_gpio_irq_unmask; + dev->irq.irq_set_type = cp2112_gpio_irq_type; + dev->irq.flags = IRQCHIP_MASK_ON_SUSPEND; + girq = &dev->gc.irq; - girq->chip = &cp2112_gpio_irqchip; + girq->chip = &dev->irq; /* The event comes from the outside so no parent handler */ girq->parent_handler = NULL; girq->num_parents = 0; From 8c4bfe30eb555bf72e2b675597a8c9304e85d376 Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Wed, 20 Jan 2021 15:34:30 +0800 Subject: [PATCH 21/39] HID: wacom: Assign boolean values to a bool variable [ Upstream commit e29c62ffb008829dc8bcc0a2ec438adc25a8255e ] Fix the following coccicheck warnings: ./drivers/hid/wacom_wac.c:2536:2-6: WARNING: Assignment of 0/1 to bool variable. Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/wacom_wac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 6cda5935fc09..2d70dc4bea65 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2533,7 +2533,7 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac, !wacom_wac->shared->is_touch_on) { if (!wacom_wac->shared->touch_down) return; - prox = 0; + prox = false; } wacom_wac->hid_data.num_received++; From eb2c81ee764db18e525e0f701540afef4025a571 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 14 Jan 2021 19:29:28 +0800 Subject: [PATCH 22/39] soc: qcom: geni: shield geni_icc_get() for ACPI boot [ Upstream commit 0c9fdcdba68208270ae85d39600ea97da1718344 ] Currently, GENI devices like i2c-qcom-geni fails to probe in ACPI boot, if interconnect support is enabled. That's because interconnect driver only supports DT right now. As interconnect is not necessarily required for basic function of GENI devices, let's shield geni_icc_get() call, and then all other ICC calls become nop due to NULL icc_path, so that GENI devices keep working for ACPI boot. Reviewed-by: Bjorn Andersson Signed-off-by: Shawn Guo Link: https://lore.kernel.org/r/20210114112928.11368-1-shawn.guo@linaro.org Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/soc/qcom/qcom-geni-se.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c index be76fddbf524..0dbca679bd32 100644 --- a/drivers/soc/qcom/qcom-geni-se.c +++ b/drivers/soc/qcom/qcom-geni-se.c @@ -741,6 +741,9 @@ int geni_icc_get(struct geni_se *se, const char *icc_ddr) int i, err; const char *icc_names[] = {"qup-core", "qup-config", icc_ddr}; + if (has_acpi_companion(se->dev)) + return 0; + for (i = 0; i < ARRAY_SIZE(se->icc_paths); i++) { if (!icc_names[i]) continue; From e8d9a93ec46e52188a95bc87924681c379e359cd Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 7 Mar 2021 06:06:28 +0200 Subject: [PATCH 23/39] dmaengine: xilinx: dpdma: Fix descriptor issuing on video group [ Upstream commit 1cbd44666216278bbb6a55bcb6b9283702171c77 ] When multiple channels are part of a video group, the transfer is triggered only when all channels in the group are ready. The logic to do so is incorrect, as it causes the descriptors for all channels but the last one in a group to not being pushed to the hardware. Fix it. Signed-off-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210307040629.29308-2-laurent.pinchart@ideasonboard.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dpdma.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index 55df63dead8d..d504112c609e 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -839,6 +839,7 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) struct xilinx_dpdma_tx_desc *desc; struct virt_dma_desc *vdesc; u32 reg, channels; + bool first_frame; lockdep_assert_held(&chan->lock); @@ -852,14 +853,6 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) chan->running = true; } - if (chan->video_group) - channels = xilinx_dpdma_chan_video_group_ready(chan); - else - channels = BIT(chan->id); - - if (!channels) - return; - vdesc = vchan_next_desc(&chan->vchan); if (!vdesc) return; @@ -884,13 +877,26 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) FIELD_PREP(XILINX_DPDMA_CH_DESC_START_ADDRE_MASK, upper_32_bits(sw_desc->dma_addr))); - if (chan->first_frame) + first_frame = chan->first_frame; + chan->first_frame = false; + + if (chan->video_group) { + channels = xilinx_dpdma_chan_video_group_ready(chan); + /* + * Trigger the transfer only when all channels in the group are + * ready. + */ + if (!channels) + return; + } else { + channels = BIT(chan->id); + } + + if (first_frame) reg = XILINX_DPDMA_GBL_TRIG_MASK(channels); else reg = XILINX_DPDMA_GBL_RETRIG_MASK(channels); - chan->first_frame = false; - dpdma_write(xdev->reg, XILINX_DPDMA_GBL, reg); } From db010ba54a96128f65b388c46875e7b991982ba4 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 7 Mar 2021 06:06:29 +0200 Subject: [PATCH 24/39] dmaengine: xilinx: dpdma: Fix race condition in done IRQ [ Upstream commit 868833fbffbe51c487df4f95d4de9194264a4b30 ] The active descriptor pointer is accessed from different contexts, including different interrupt handlers, and its access must be protected by the channel's lock. This wasn't done in the done IRQ handler. Fix it. Signed-off-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210307040629.29308-3-laurent.pinchart@ideasonboard.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/xilinx/xilinx_dpdma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index d504112c609e..70b29bd079c9 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -1048,13 +1048,14 @@ static int xilinx_dpdma_chan_stop(struct xilinx_dpdma_chan *chan) */ static void xilinx_dpdma_chan_done_irq(struct xilinx_dpdma_chan *chan) { - struct xilinx_dpdma_tx_desc *active = chan->desc.active; + struct xilinx_dpdma_tx_desc *active; unsigned long flags; spin_lock_irqsave(&chan->lock, flags); xilinx_dpdma_debugfs_desc_done_irq(chan); + active = chan->desc.active; if (active) vchan_cyclic_callback(&active->vdesc); else From 6ce64437224df9f28bb4bc17a4b5363560dcc79a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 24 Mar 2021 15:10:32 +0200 Subject: [PATCH 25/39] ARM: dts: Fix swapped mmc order for omap3 [ Upstream commit a1ebdb3741993f853865d1bd8f77881916ad53a7 ] Also some omap3 devices like n900 seem to have eMMC and micro-sd swapped around with commit 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4"). Let's fix the issue with aliases as discussed on the mailing lists. While the mmc aliases should be board specific, let's first fix the issue with minimal changes. Cc: Aaro Koskinen Cc: Peter Ujfalusi Signed-off-by: Tony Lindgren Signed-off-by: Sasha Levin --- arch/arm/boot/dts/omap3.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 9dcae1f2bc99..c5b9da0d7e6c 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -24,6 +24,9 @@ aliases { i2c0 = &i2c1; i2c1 = &i2c2; i2c2 = &i2c3; + mmc0 = &mmc1; + mmc1 = &mmc2; + mmc2 = &mmc3; serial0 = &uart1; serial1 = &uart2; serial2 = &uart3; From 66d0cf7dcaa1093b7bc3f6e8995240b8be8b287d Mon Sep 17 00:00:00 2001 From: Phillip Potter Date: Sun, 11 Apr 2021 12:28:24 +0100 Subject: [PATCH 26/39] net: geneve: check skb is large enough for IPv4/IPv6 header [ Upstream commit 6628ddfec7580882f11fdc5c194a8ea781fdadfa ] Check within geneve_xmit_skb/geneve6_xmit_skb that sk_buff structure is large enough to include IPv4 or IPv6 header, and reject if not. The geneve_xmit_skb portion and overall idea was contributed by Eric Dumazet. Fixes a KMSAN-found uninit-value bug reported by syzbot at: https://syzkaller.appspot.com/bug?id=abe95dc3e3e9667fc23b8d81f29ecad95c6f106f Suggested-by: Eric Dumazet Reported-by: syzbot+2e406a9ac75bb71d4b7a@syzkaller.appspotmail.com Signed-off-by: Phillip Potter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/geneve.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index abd37f26af68..11864ac101b8 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -890,6 +890,9 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + return -EINVAL; + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, geneve->cfg.info.key.tp_dst, sport); @@ -984,6 +987,9 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) + return -EINVAL; + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, geneve->cfg.info.key.tp_dst, sport); From d33031a894d2f6476e54cccfbfa9f7971e5522af Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 9 Apr 2021 16:28:05 +0800 Subject: [PATCH 27/39] dmaengine: tegra20: Fix runtime PM imbalance on error [ Upstream commit 917a3200b9f467a154999c7572af345f2470aaf4 ] pm_runtime_get_sync() will increase the runtime PM counter even it returns an error. Thus a pairing decrement is needed to prevent refcount leak. Fix this by replacing this API with pm_runtime_resume_and_get(), which will not change the runtime PM counter on error. Signed-off-by: Dinghao Liu Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20210409082805.23643-1-dinghao.liu@zju.edu.cn Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/tegra20-apb-dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 71827d9b0aa1..b7260749e8ee 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -723,7 +723,7 @@ static void tegra_dma_issue_pending(struct dma_chan *dc) goto end; } if (!tdc->busy) { - err = pm_runtime_get_sync(tdc->tdma->dev); + err = pm_runtime_resume_and_get(tdc->tdma->dev); if (err < 0) { dev_err(tdc2dev(tdc), "Failed to enable DMA\n"); goto end; @@ -818,7 +818,7 @@ static void tegra_dma_synchronize(struct dma_chan *dc) struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); int err; - err = pm_runtime_get_sync(tdc->tdma->dev); + err = pm_runtime_resume_and_get(tdc->tdma->dev); if (err < 0) { dev_err(tdc2dev(tdc), "Failed to synchronize DMA: %d\n", err); return; From da99331fc6ce2d25f88d47249f04714633d3f0ec Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 9 Apr 2021 00:15:21 +0200 Subject: [PATCH 28/39] s390/entry: save the caller of psw_idle [ Upstream commit a994eddb947ea9ebb7b14d9a1267001699f0a136 ] Currently psw_idle does not allocate a stack frame and does not save its r14 and r15 into the save area. Even though this is valid from call ABI point of view, because psw_idle does not make any calls explicitly, in reality psw_idle is an entry point for controlled transition into serving interrupts. So, in practice, psw_idle stack frame is analyzed during stack unwinding. Depending on build options that r14 slot in the save area of psw_idle might either contain a value saved by previous sibling call or complete garbage. [task 0000038000003c28] do_ext_irq+0xd6/0x160 [task 0000038000003c78] ext_int_handler+0xba/0xe8 [task *0000038000003dd8] psw_idle_exit+0x0/0x8 <-- pt_regs ([task 0000038000003dd8] 0x0) [task 0000038000003e10] default_idle_call+0x42/0x148 [task 0000038000003e30] do_idle+0xce/0x160 [task 0000038000003e70] cpu_startup_entry+0x36/0x40 [task 0000038000003ea0] arch_call_rest_init+0x76/0x80 So, to make a stacktrace nicer and actually point for the real caller of psw_idle in this frequently occurring case, make psw_idle save its r14. [task 0000038000003c28] do_ext_irq+0xd6/0x160 [task 0000038000003c78] ext_int_handler+0xba/0xe8 [task *0000038000003dd8] psw_idle_exit+0x0/0x6 <-- pt_regs ([task 0000038000003dd8] arch_cpu_idle+0x3c/0xd0) [task 0000038000003e10] default_idle_call+0x42/0x148 [task 0000038000003e30] do_idle+0xce/0x160 [task 0000038000003e70] cpu_startup_entry+0x36/0x40 [task 0000038000003ea0] arch_call_rest_init+0x76/0x80 Reviewed-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kernel/entry.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 71203324ff42..81c458e996d9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -994,6 +994,7 @@ ENDPROC(ext_int_handler) * Load idle PSW. */ ENTRY(psw_idle) + stg %r14,(__SF_GPRS+8*8)(%r15) stg %r3,__SF_EMPTY(%r15) larl %r1,.Lpsw_idle_exit stg %r1,__SF_EMPTY+8(%r15) From 509ae27a1874389182b3709de0940a8f8d4dfb8b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 12 Apr 2021 17:41:01 +0800 Subject: [PATCH 29/39] arm64: kprobes: Restore local irqflag if kprobes is cancelled [ Upstream commit 738fa58ee1328481d1d7889e7c430b3401c571b9 ] If instruction being single stepped caused a page fault, the kprobes is cancelled to let the page fault handler continue as a normal page fault. But the local irqflags are disabled so cpu will restore pstate with DAIF masked. After pagefault is serviced, the kprobes is triggerred again, we overwrite the saved_irqflag by calling kprobes_save_local_irqflag(). NOTE, DAIF is masked in this new saved irqflag. After kprobes is serviced, the cpu pstate is retored with DAIF masked. This patch is inspired by one patch for riscv from Liao Chang. Signed-off-by: Jisheng Zhang Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210412174101.6bfb0594@xhacker.debian Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/kernel/probes/kprobes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index f11a1a1f7026..798c3e78b84b 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -286,10 +286,12 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) if (!instruction_pointer(regs)) BUG(); - if (kcb->kprobe_status == KPROBE_REENTER) + if (kcb->kprobe_status == KPROBE_REENTER) { restore_previous_kprobe(kcb); - else + } else { + kprobes_restore_local_irqflag(kcb, regs); reset_current_kprobe(); + } break; case KPROBE_HIT_ACTIVE: From 1bfefd866195987ded82605dc417c1a2ba523bf7 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 13 Apr 2021 16:25:12 +0100 Subject: [PATCH 30/39] xen-netback: Check for hotplug-status existence before watching [ Upstream commit 2afeec08ab5c86ae21952151f726bfe184f6b23d ] The logic in connect() is currently written with the assumption that xenbus_watch_pathfmt() will return an error for a node that does not exist. This assumption is incorrect: xenstore does allow a watch to be registered for a nonexistent node (and will send notifications should the node be subsequently created). As of commit 1f2565780 ("xen-netback: remove 'hotplug-status' once it has served its purpose"), this leads to a failure when a domU transitions into XenbusStateConnected more than once. On the first domU transition into Connected state, the "hotplug-status" node will be deleted by the hotplug_status_changed() callback in dom0. On the second or subsequent domU transition into Connected state, the hotplug_status_changed() callback will therefore never be invoked, and so the backend will remain stuck in InitWait. This failure prevents scenarios such as reloading the xen-netfront module within a domU, or booting a domU via iPXE. There is unfortunately no way for the domU to work around this dom0 bug. Fix by explicitly checking for existence of the "hotplug-status" node, thereby creating the behaviour that was previously assumed to exist. Signed-off-by: Michael Brown Reviewed-by: Paul Durrant Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/xen-netback/xenbus.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 6f10e0998f1c..94d19158efc1 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -824,11 +824,15 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, - hotplug_status_changed, - "%s/%s", dev->nodename, "hotplug-status"); - if (!err) + if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) { + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, + NULL, hotplug_status_changed, + "%s/%s", dev->nodename, + "hotplug-status"); + if (err) + goto err; be->have_hotplug_status_watch = 1; + } netif_tx_wake_all_queues(be->vif->dev); From f2b46286e3260c8f416af8b1360a0d3858db618e Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Wed, 14 Apr 2021 19:31:48 +0800 Subject: [PATCH 31/39] cavium/liquidio: Fix duplicate argument [ Upstream commit 416dcc5ce9d2a810477171c62ffa061a98f87367 ] Fix the following coccicheck warning: ./drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h:413:6-28: duplicated argument to & or | The CN6XXX_INTR_M1UPB0_ERR here is duplicate. Here should be CN6XXX_INTR_M1UNB0_ERR. Signed-off-by: Wan Jiabing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h index b248966837b4..7aad40b2aa73 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h @@ -412,7 +412,7 @@ | CN6XXX_INTR_M0UNWI_ERR \ | CN6XXX_INTR_M1UPB0_ERR \ | CN6XXX_INTR_M1UPWI_ERR \ - | CN6XXX_INTR_M1UPB0_ERR \ + | CN6XXX_INTR_M1UNB0_ERR \ | CN6XXX_INTR_M1UNWI_ERR \ | CN6XXX_INTR_INSTR_DB_OF_ERR \ | CN6XXX_INTR_SLIST_DB_OF_ERR \ From 393200a1b095bb5bae17ed06340f4848f5cc152a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Apr 2021 15:45:57 -0700 Subject: [PATCH 32/39] kasan: fix hwasan build for gcc [ Upstream commit 5c595ac4c776c44b5c59de22ab43b3fe256d9fbb ] gcc-11 adds support for -fsanitize=kernel-hwaddress, so it becomes possible to enable CONFIG_KASAN_SW_TAGS. Unfortunately this fails to build at the moment, because the corresponding command line arguments use llvm specific syntax. Change it to use the cc-param macro instead, which works on both clang and gcc. [elver@google.com: fixup for "kasan: fix hwasan build for gcc"] Link: https://lkml.kernel.org/r/YHQZVfVVLE/LDK2v@elver.google.com Link: https://lkml.kernel.org/r/20210323124112.1229772-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Marco Elver Reviewed-by: Marco Elver Acked-by: Andrey Konovalov Cc: Masahiro Yamada Cc: Michal Marek Cc: Andrey Ryabinin Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- scripts/Makefile.kasan | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 1e000cc2e7b4..127012f45166 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -2,6 +2,8 @@ CFLAGS_KASAN_NOSANITIZE := -fno-builtin KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) +cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) + ifdef CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_INLINE @@ -12,8 +14,6 @@ endif CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address -cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) - # -fasan-shadow-offset fails without -fsanitize CFLAGS_KASAN_SHADOW := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET), \ @@ -36,14 +36,14 @@ endif # CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_SW_TAGS ifdef CONFIG_KASAN_INLINE - instrumentation_flags := -mllvm -hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET) + instrumentation_flags := $(call cc-param,hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET)) else - instrumentation_flags := -mllvm -hwasan-instrument-with-calls=1 + instrumentation_flags := $(call cc-param,hwasan-instrument-with-calls=1) endif CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ - -mllvm -hwasan-instrument-stack=$(CONFIG_KASAN_STACK) \ - -mllvm -hwasan-use-short-granules=0 \ + $(call cc-param,hwasan-instrument-stack=$(CONFIG_KASAN_STACK)) \ + $(call cc-param,hwasan-use-short-granules=0) \ $(instrumentation_flags) endif # CONFIG_KASAN_SW_TAGS From f4a777bcc8d194cdaae1220d35073fe2828dbb3d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:03 -0700 Subject: [PATCH 33/39] csky: change a Kconfig symbol name to fix e1000 build error [ Upstream commit d199161653d612b8fb96ac51bfd5b2d2782ecef3 ] e1000's #define of CONFIG_RAM_BASE conflicts with a Kconfig symbol in arch/csky/Kconfig. The symbol in e1000 has been around longer, so change arch/csky/ to use DRAM_BASE instead of RAM_BASE to remove the conflict. (although e1000 is also a 2-line change) Link: https://lkml.kernel.org/r/20210411055335.7111-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Reported-by: kernel test robot Acked-by: Guo Ren Cc: Jesse Brandeburg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/csky/Kconfig | 2 +- arch/csky/include/asm/page.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 268fad5f51cf..7bf0a617e94c 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -292,7 +292,7 @@ config FORCE_MAX_ZONEORDER int "Maximum zone order" default "11" -config RAM_BASE +config DRAM_BASE hex "DRAM start addr (the same with memory-section in dts)" default 0x0 diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 9b98bf31d57c..16878240ef9a 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -28,7 +28,7 @@ #define SSEG_SIZE 0x20000000 #define LOWMEM_LIMIT (SSEG_SIZE * 2) -#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (SSEG_SIZE - 1)) +#define PHYS_OFFSET_OFFSET (CONFIG_DRAM_BASE & (SSEG_SIZE - 1)) #ifndef __ASSEMBLY__ From ba0910ad1c5770ff74b71000b131a7965c373c30 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Apr 2021 15:46:09 -0700 Subject: [PATCH 34/39] ia64: fix discontig.c section mismatches [ Upstream commit e2af9da4f867a1a54f1252bf3abc1a5c63951778 ] Fix IA64 discontig.c Section mismatch warnings. When CONFIG_SPARSEMEM=y and CONFIG_MEMORY_HOTPLUG=y, the functions computer_pernodesize() and scatter_node_data() should not be marked as __meminit because they are needed after init, on any memory hotplug event. Also, early_nr_cpus_node() is called by compute_pernodesize(), so early_nr_cpus_node() cannot be __meminit either. WARNING: modpost: vmlinux.o(.text.unlikely+0x1612): Section mismatch in reference from the function arch_alloc_nodedata() to the function .meminit.text:compute_pernodesize() The function arch_alloc_nodedata() references the function __meminit compute_pernodesize(). This is often because arch_alloc_nodedata lacks a __meminit annotation or the annotation of compute_pernodesize is wrong. WARNING: modpost: vmlinux.o(.text.unlikely+0x1692): Section mismatch in reference from the function arch_refresh_nodedata() to the function .meminit.text:scatter_node_data() The function arch_refresh_nodedata() references the function __meminit scatter_node_data(). This is often because arch_refresh_nodedata lacks a __meminit annotation or the annotation of scatter_node_data is wrong. WARNING: modpost: vmlinux.o(.text.unlikely+0x1502): Section mismatch in reference from the function compute_pernodesize() to the function .meminit.text:early_nr_cpus_node() The function compute_pernodesize() references the function __meminit early_nr_cpus_node(). This is often because compute_pernodesize lacks a __meminit annotation or the annotation of early_nr_cpus_node is wrong. Link: https://lkml.kernel.org/r/20210411001201.3069-1-rdunlap@infradead.org Signed-off-by: Randy Dunlap Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- arch/ia64/mm/discontig.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index dbe829fc5298..4d0813419013 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -94,7 +94,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len, * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been * called yet. Note that node 0 will also count all non-existent cpus. */ -static int __meminit early_nr_cpus_node(int node) +static int early_nr_cpus_node(int node) { int cpu, n = 0; @@ -109,7 +109,7 @@ static int __meminit early_nr_cpus_node(int node) * compute_pernodesize - compute size of pernode data * @node: the node id. */ -static unsigned long __meminit compute_pernodesize(int node) +static unsigned long compute_pernodesize(int node) { unsigned long pernodesize = 0, cpus; @@ -366,7 +366,7 @@ static void __init reserve_pernode_space(void) } } -static void __meminit scatter_node_data(void) +static void scatter_node_data(void) { pg_data_t **dst; int node; From bed21bed2e79eb3687370bec6eaa36c4857c40db Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Fri, 16 Apr 2021 15:46:15 -0700 Subject: [PATCH 35/39] ia64: tools: remove duplicate definition of ia64_mf() on ia64 [ Upstream commit f4bf09dc3aaa4b07cd15630f2023f68cb2668809 ] The ia64_mf() macro defined in tools/arch/ia64/include/asm/barrier.h is already defined in on ia64 which causes libbpf failing to build: CC /usr/src/linux/tools/bpf/bpftool//libbpf/staticobjs/libbpf.o In file included from /usr/src/linux/tools/include/asm/barrier.h:24, from /usr/src/linux/tools/include/linux/ring_buffer.h:4, from libbpf.c:37: /usr/src/linux/tools/include/asm/../../arch/ia64/include/asm/barrier.h:43: error: "ia64_mf" redefined [-Werror] 43 | #define ia64_mf() asm volatile ("mf" ::: "memory") | In file included from /usr/include/ia64-linux-gnu/asm/intrinsics.h:20, from /usr/include/ia64-linux-gnu/asm/swab.h:11, from /usr/include/linux/swab.h:8, from /usr/include/linux/byteorder/little_endian.h:13, from /usr/include/ia64-linux-gnu/asm/byteorder.h:5, from /usr/src/linux/tools/include/uapi/linux/perf_event.h:20, from libbpf.c:36: /usr/include/ia64-linux-gnu/asm/gcc_intrin.h:382: note: this is the location of the previous definition 382 | #define ia64_mf() __asm__ volatile ("mf" ::: "memory") | cc1: all warnings being treated as errors Thus, remove the definition from tools/arch/ia64/include/asm/barrier.h. Signed-off-by: John Paul Adrian Glaubitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- tools/arch/ia64/include/asm/barrier.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h index 4d471d9511a5..6fffe5682713 100644 --- a/tools/arch/ia64/include/asm/barrier.h +++ b/tools/arch/ia64/include/asm/barrier.h @@ -39,9 +39,6 @@ * sequential memory pages only. */ -/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */ -#define ia64_mf() asm volatile ("mf" ::: "memory") - #define mb() ia64_mf() #define rmb() mb() #define wmb() mb() From 31720f9e87c032b74de9661e67cfc01414d27052 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 16 Apr 2021 14:02:07 +0200 Subject: [PATCH 36/39] x86/crash: Fix crash_setup_memmap_entries() out-of-bounds access commit 5849cdf8c120e3979c57d34be55b92d90a77a47e upstream. Commit in Fixes: added support for kexec-ing a kernel on panic using a new system call. As part of it, it does prepare a memory map for the new kernel. However, while doing so, it wrongly accesses memory it has not allocated: it accesses the first element of the cmem->ranges[] array in memmap_exclude_ranges() but it has not allocated the memory for it in crash_setup_memmap_entries(). As KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in crash_setup_memmap_entries+0x17e/0x3a0 Write of size 8 at addr ffffc90000426008 by task kexec/1187 (gdb) list *crash_setup_memmap_entries+0x17e 0xffffffff8107cafe is in crash_setup_memmap_entries (arch/x86/kernel/crash.c:322). 317 unsigned long long mend) 318 { 319 unsigned long start, end; 320 321 cmem->ranges[0].start = mstart; 322 cmem->ranges[0].end = mend; 323 cmem->nr_ranges = 1; 324 325 /* Exclude elf header region */ 326 start = image->arch.elf_load_addr; (gdb) Make sure the ranges array becomes a single element allocated. [ bp: Write a proper commit message. ] Fixes: dd5f726076cc ("kexec: support for kexec on panic using new system call") Signed-off-by: Mike Galbraith Signed-off-by: Borislav Petkov Reviewed-by: Dave Young Cc: Link: https://lkml.kernel.org/r/725fa3dc1da2737f0f6188a1a9701bead257ea9d.camel@gmx.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/crash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a8f3af257e26..b1deacbeb266 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -337,7 +337,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) struct crash_memmap_data cmd; struct crash_mem *cmem; - cmem = vzalloc(sizeof(struct crash_mem)); + cmem = vzalloc(struct_size(cmem, ranges, 1)); if (!cmem) return -ENOMEM; From 90642ee9eb581a13569b1c0bd57e85d962215273 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 26 Apr 2021 10:11:49 +0200 Subject: [PATCH 37/39] net: hso: fix NULL-deref on disconnect regression commit 2ad5692db72874f02b9ad551d26345437ea4f7f3 upstream. Commit 8a12f8836145 ("net: hso: fix null-ptr-deref during tty device unregistration") fixed the racy minor allocation reported by syzbot, but introduced an unconditional NULL-pointer dereference on every disconnect instead. Specifically, the serial device table must no longer be accessed after the minor has been released by hso_serial_tty_unregister(). Fixes: 8a12f8836145 ("net: hso: fix null-ptr-deref during tty device unregistration") Cc: stable@vger.kernel.org Cc: Anirudh Rayabharam Reported-by: Leonardo Antoniazzi Signed-off-by: Johan Hovold Reviewed-by: Anirudh Rayabharam Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/hso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index d18642a8144c..4909405803d5 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -3104,7 +3104,7 @@ static void hso_free_interface(struct usb_interface *interface) cancel_work_sync(&serial_table[i]->async_put_intf); cancel_work_sync(&serial_table[i]->async_get_intf); hso_serial_tty_unregister(serial); - kref_put(&serial_table[i]->ref, hso_serial_ref_free); + kref_put(&serial->parent->ref, hso_serial_ref_free); } } From 8a661bad6cee44d897f9840995f2caf81e1fea49 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 21 Apr 2021 09:45:13 +0200 Subject: [PATCH 38/39] USB: CDC-ACM: fix poison/unpoison imbalance commit a8b3b519618f30a87a304c4e120267ce6f8dc68a upstream. suspend() does its poisoning conditionally, resume() does it unconditionally. On a device with combined interfaces this will balance, on a device with two interfaces the counter will go negative and resubmission will fail. Both actions need to be done conditionally. Fixes: 6069e3e927c8f ("USB: cdc-acm: untangle a circular dependency between callback and softint") Signed-off-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20210421074513.4327-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index e79359326411..bc035ba6e010 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1637,12 +1637,13 @@ static int acm_resume(struct usb_interface *intf) struct urb *urb; int rv = 0; - acm_unpoison_urbs(acm); spin_lock_irq(&acm->write_lock); if (--acm->susp_count) goto out; + acm_unpoison_urbs(acm); + if (tty_port_initialized(&acm->port)) { rv = usb_submit_urb(acm->ctrlurb, GFP_ATOMIC); From 8bd8301ccc115b7885517077a097ee028fcb1ec2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 28 Apr 2021 13:40:03 +0200 Subject: [PATCH 39/39] Linux 5.10.33 Tested-by: Jon Hunter Tested-by: Fox Chen Tested-By: Patrick McCormick Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Salvatore Bonaccorso Tested-by: Sudip Mukherjee Tested-by: Jason Self Tested-by: Shuah Khan Tested-by: Hulk Robot Tested-by: Linux Kernel Functional Testing Tested-by: Pavel Machek (CIP) Tested-by: Andrei Rabusov Link: https://lore.kernel.org/r/20210426072818.777662399@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index cad90171b4b9..fd5c8b5c013b 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 32 +SUBLEVEL = 33 EXTRAVERSION = NAME = Dare mighty things