From abda366ece483d9f94edecb8cadc3704ec68c434 Mon Sep 17 00:00:00 2001 From: Yann Gautier Date: Mon, 28 Mar 2022 16:51:14 +0200 Subject: [PATCH 01/71] mmc: mmci: stm32: use a buffer for unaligned DMA requests [ Upstream commit 970dc9c11a17994ab878016b536612ab00d1441d ] In SDIO mode, the sg list for requests can be unaligned with what the STM32 SDMMC internal DMA can support. In that case, instead of failing, use a temporary bounce buffer to copy from/to the sg list. This buffer is limited to 1MB. But for that we need to also limit max_req_size to 1MB. It has not shown any throughput penalties for SD-cards or eMMC. Signed-off-by: Yann Gautier Link: https://lore.kernel.org/r/20220328145114.334577-1-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson Stable-dep-of: 6b1ba3f9040b ("mmc: mmci: stm32: fix DMA API overlapping mappings warning") Signed-off-by: Sasha Levin --- drivers/mmc/host/mmci_stm32_sdmmc.c | 88 +++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 17 deletions(-) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 4cceb9bab036..11ae0cb47923 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -43,6 +43,9 @@ struct sdmmc_lli_desc { struct sdmmc_idma { dma_addr_t sg_dma; void *sg_cpu; + dma_addr_t bounce_dma_addr; + void *bounce_buf; + bool use_bounce_buffer; }; struct sdmmc_dlyb { @@ -54,6 +57,8 @@ struct sdmmc_dlyb { static int sdmmc_idma_validate_data(struct mmci_host *host, struct mmc_data *data) { + struct sdmmc_idma *idma = host->dma_priv; + struct device *dev = mmc_dev(host->mmc); struct scatterlist *sg; int i; @@ -61,41 +66,69 @@ static int sdmmc_idma_validate_data(struct mmci_host *host, * idma has constraints on idmabase & idmasize for each element * excepted the last element which has no constraint on idmasize */ + idma->use_bounce_buffer = false; for_each_sg(data->sg, sg, data->sg_len - 1, i) { if (!IS_ALIGNED(sg->offset, sizeof(u32)) || !IS_ALIGNED(sg->length, SDMMC_IDMA_BURST)) { - dev_err(mmc_dev(host->mmc), + dev_dbg(mmc_dev(host->mmc), "unaligned scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); - return -EINVAL; + goto use_bounce_buffer; } } if (!IS_ALIGNED(sg->offset, sizeof(u32))) { - dev_err(mmc_dev(host->mmc), + dev_dbg(mmc_dev(host->mmc), "unaligned last scatterlist: ofst:%x length:%d\n", data->sg->offset, data->sg->length); - return -EINVAL; + goto use_bounce_buffer; } + return 0; + +use_bounce_buffer: + if (!idma->bounce_buf) { + idma->bounce_buf = dmam_alloc_coherent(dev, + host->mmc->max_req_size, + &idma->bounce_dma_addr, + GFP_KERNEL); + if (!idma->bounce_buf) { + dev_err(dev, "Unable to map allocate DMA bounce buffer.\n"); + return -ENOMEM; + } + } + + idma->use_bounce_buffer = true; + return 0; } static int _sdmmc_idma_prep_data(struct mmci_host *host, struct mmc_data *data) { - int n_elem; + struct sdmmc_idma *idma = host->dma_priv; - n_elem = dma_map_sg(mmc_dev(host->mmc), - data->sg, - data->sg_len, - mmc_get_dma_dir(data)); + if (idma->use_bounce_buffer) { + if (data->flags & MMC_DATA_WRITE) { + unsigned int xfer_bytes = data->blksz * data->blocks; - if (!n_elem) { - dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n"); - return -EINVAL; + sg_copy_to_buffer(data->sg, data->sg_len, + idma->bounce_buf, xfer_bytes); + dma_wmb(); + } + } else { + int n_elem; + + n_elem = dma_map_sg(mmc_dev(host->mmc), + data->sg, + data->sg_len, + mmc_get_dma_dir(data)); + + if (!n_elem) { + dev_err(mmc_dev(host->mmc), "dma_map_sg failed\n"); + return -EINVAL; + } } - return 0; } @@ -112,8 +145,19 @@ static int sdmmc_idma_prep_data(struct mmci_host *host, static void sdmmc_idma_unprep_data(struct mmci_host *host, struct mmc_data *data, int err) { - dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - mmc_get_dma_dir(data)); + struct sdmmc_idma *idma = host->dma_priv; + + if (idma->use_bounce_buffer) { + if (data->flags & MMC_DATA_READ) { + unsigned int xfer_bytes = data->blksz * data->blocks; + + sg_copy_from_buffer(data->sg, data->sg_len, + idma->bounce_buf, xfer_bytes); + } + } else { + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); + } } static int sdmmc_idma_setup(struct mmci_host *host) @@ -137,6 +181,8 @@ static int sdmmc_idma_setup(struct mmci_host *host) host->mmc->max_segs = SDMMC_LLI_BUF_LEN / sizeof(struct sdmmc_lli_desc); host->mmc->max_seg_size = host->variant->stm32_idmabsize_mask; + + host->mmc->max_req_size = SZ_1M; } else { host->mmc->max_segs = 1; host->mmc->max_seg_size = host->mmc->max_req_size; @@ -154,8 +200,16 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; - if (!host->variant->dma_lli || data->sg_len == 1) { - writel_relaxed(sg_dma_address(data->sg), + if (!host->variant->dma_lli || data->sg_len == 1 || + idma->use_bounce_buffer) { + u32 dma_addr; + + if (idma->use_bounce_buffer) + dma_addr = idma->bounce_dma_addr; + else + dma_addr = sg_dma_address(data->sg); + + writel_relaxed(dma_addr, host->base + MMCI_STM32_IDMABASE0R); writel_relaxed(MMCI_STM32_IDMAEN, host->base + MMCI_STM32_IDMACTRLR); From 0224cbc53ba82b84affa7619b6d1b1a254bc2c53 Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Wed, 7 Feb 2024 15:39:51 +0100 Subject: [PATCH 02/71] mmc: mmci: stm32: fix DMA API overlapping mappings warning [ Upstream commit 6b1ba3f9040be5efc4396d86c9752cdc564730be ] Turning on CONFIG_DMA_API_DEBUG_SG results in the following warning: DMA-API: mmci-pl18x 48220000.mmc: cacheline tracking EEXIST, overlapping mappings aren't supported WARNING: CPU: 1 PID: 51 at kernel/dma/debug.c:568 add_dma_entry+0x234/0x2f4 Modules linked in: CPU: 1 PID: 51 Comm: kworker/1:2 Not tainted 6.1.28 #1 Hardware name: STMicroelectronics STM32MP257F-EV1 Evaluation Board (DT) Workqueue: events_freezable mmc_rescan Call trace: add_dma_entry+0x234/0x2f4 debug_dma_map_sg+0x198/0x350 __dma_map_sg_attrs+0xa0/0x110 dma_map_sg_attrs+0x10/0x2c sdmmc_idma_prep_data+0x80/0xc0 mmci_prep_data+0x38/0x84 mmci_start_data+0x108/0x2dc mmci_request+0xe4/0x190 __mmc_start_request+0x68/0x140 mmc_start_request+0x94/0xc0 mmc_wait_for_req+0x70/0x100 mmc_send_tuning+0x108/0x1ac sdmmc_execute_tuning+0x14c/0x210 mmc_execute_tuning+0x48/0xec mmc_sd_init_uhs_card.part.0+0x208/0x464 mmc_sd_init_card+0x318/0x89c mmc_attach_sd+0xe4/0x180 mmc_rescan+0x244/0x320 DMA API debug brings to light leaking dma-mappings as dma_map_sg and dma_unmap_sg are not correctly balanced. If an error occurs in mmci_cmd_irq function, only mmci_dma_error function is called and as this API is not managed on stm32 variant, dma_unmap_sg is never called in this error path. Signed-off-by: Christophe Kerello Fixes: 46b723dd867d ("mmc: mmci: add stm32 sdmmc variant") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240207143951.938144-1-christophe.kerello@foss.st.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/mmci_stm32_sdmmc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 11ae0cb47923..e3201a621870 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -200,6 +200,8 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) struct scatterlist *sg; int i; + host->dma_in_progress = true; + if (!host->variant->dma_lli || data->sg_len == 1 || idma->use_bounce_buffer) { u32 dma_addr; @@ -238,9 +240,30 @@ static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) return 0; } +static void sdmmc_idma_error(struct mmci_host *host) +{ + struct mmc_data *data = host->data; + struct sdmmc_idma *idma = host->dma_priv; + + if (!dma_inprogress(host)) + return; + + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; + data->host_cookie = 0; + + if (!idma->use_bounce_buffer) + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + mmc_get_dma_dir(data)); +} + static void sdmmc_idma_finalize(struct mmci_host *host, struct mmc_data *data) { + if (!dma_inprogress(host)) + return; + writel_relaxed(0, host->base + MMCI_STM32_IDMACTRLR); + host->dma_in_progress = false; if (!data->host_cookie) sdmmc_idma_unprep_data(host, data, 0); @@ -566,6 +589,7 @@ static struct mmci_host_ops sdmmc_variant_ops = { .dma_setup = sdmmc_idma_setup, .dma_start = sdmmc_idma_start, .dma_finalize = sdmmc_idma_finalize, + .dma_error = sdmmc_idma_error, .set_clkreg = mmci_sdmmc_set_clkreg, .set_pwrreg = mmci_sdmmc_set_pwrreg, .busy_complete = sdmmc_busy_complete, From 061336268eb962100ceb0db81312ee2b5f319ac8 Mon Sep 17 00:00:00 2001 From: John Efstathiades Date: Tue, 24 Aug 2021 19:56:04 +0100 Subject: [PATCH 03/71] lan78xx: Fix white space and style issues [ Upstream commit 9ceec7d33adf9647293f24d2fd9a055b89c63864 ] Fix white space and code style issues identified by checkpatch. Signed-off-by: John Efstathiades Signed-off-by: David S. Miller Stable-dep-of: 1eecc7ab82c4 ("net: lan78xx: fix runtime PM count underflow on link stop") Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 80 ++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index c5a666bb86ee..00424f337552 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -385,7 +385,7 @@ struct lan78xx_net { struct usb_anchor deferred; struct mutex phy_mutex; /* for phy access */ - unsigned pipe_in, pipe_out, pipe_intr; + unsigned int pipe_in, pipe_out, pipe_intr; u32 hard_mtu; /* count any extra framing */ size_t rx_urb_size; /* size for rx urbs */ @@ -395,7 +395,7 @@ struct lan78xx_net { wait_queue_head_t *wait; unsigned char suspend_count; - unsigned maxpacket; + unsigned int maxpacket; struct timer_list delay; struct timer_list stat_monitor; @@ -504,7 +504,7 @@ static int lan78xx_read_stats(struct lan78xx_net *dev, if (likely(ret >= 0)) { src = (u32 *)stats; dst = (u32 *)data; - for (i = 0; i < sizeof(*stats)/sizeof(u32); i++) { + for (i = 0; i < sizeof(*stats) / sizeof(u32); i++) { le32_to_cpus(&src[i]); dst[i] = src[i]; } @@ -518,10 +518,11 @@ static int lan78xx_read_stats(struct lan78xx_net *dev, return ret; } -#define check_counter_rollover(struct1, dev_stats, member) { \ - if (struct1->member < dev_stats.saved.member) \ - dev_stats.rollover_count.member++; \ - } +#define check_counter_rollover(struct1, dev_stats, member) \ + do { \ + if ((struct1)->member < (dev_stats).saved.member) \ + (dev_stats).rollover_count.member++; \ + } while (0) static void lan78xx_check_stat_rollover(struct lan78xx_net *dev, struct lan78xx_statstage *stats) @@ -847,9 +848,9 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, for (i = 0; i < length; i++) { lan78xx_write_reg(dev, OTP_ADDR1, - ((offset + i) >> 8) & OTP_ADDR1_15_11); + ((offset + i) >> 8) & OTP_ADDR1_15_11); lan78xx_write_reg(dev, OTP_ADDR2, - ((offset + i) & OTP_ADDR2_10_3)); + ((offset + i) & OTP_ADDR2_10_3)); lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); @@ -903,9 +904,9 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset, for (i = 0; i < length; i++) { lan78xx_write_reg(dev, OTP_ADDR1, - ((offset + i) >> 8) & OTP_ADDR1_15_11); + ((offset + i) >> 8) & OTP_ADDR1_15_11); lan78xx_write_reg(dev, OTP_ADDR2, - ((offset + i) & OTP_ADDR2_10_3)); + ((offset + i) & OTP_ADDR2_10_3)); lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); @@ -962,7 +963,7 @@ static int lan78xx_dataport_wait_not_busy(struct lan78xx_net *dev) usleep_range(40, 100); } - netdev_warn(dev->net, "lan78xx_dataport_wait_not_busy timed out"); + netdev_warn(dev->net, "%s timed out", __func__); return -EIO; } @@ -975,7 +976,7 @@ static int lan78xx_dataport_write(struct lan78xx_net *dev, u32 ram_select, int i, ret; if (usb_autopm_get_interface(dev->intf) < 0) - return 0; + return 0; mutex_lock(&pdata->dataport_mutex); @@ -1048,9 +1049,9 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param) for (i = 1; i < NUM_OF_MAF; i++) { lan78xx_write_reg(dev, MAF_HI(i), 0); lan78xx_write_reg(dev, MAF_LO(i), - pdata->pfilter_table[i][1]); + pdata->pfilter_table[i][1]); lan78xx_write_reg(dev, MAF_HI(i), - pdata->pfilter_table[i][0]); + pdata->pfilter_table[i][0]); } lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); @@ -1069,11 +1070,12 @@ static void lan78xx_set_multicast(struct net_device *netdev) RFE_CTL_DA_PERFECT_ | RFE_CTL_MCAST_HASH_); for (i = 0; i < DP_SEL_VHF_HASH_LEN; i++) - pdata->mchash_table[i] = 0; + pdata->mchash_table[i] = 0; + /* pfilter_table[0] has own HW address */ for (i = 1; i < NUM_OF_MAF; i++) { - pdata->pfilter_table[i][0] = - pdata->pfilter_table[i][1] = 0; + pdata->pfilter_table[i][0] = 0; + pdata->pfilter_table[i][1] = 0; } pdata->rfe_ctl |= RFE_CTL_BCAST_EN_; @@ -1267,9 +1269,10 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb) generic_handle_irq(dev->domain_data.phyirq); local_irq_enable(); } - } else + } else { netdev_warn(dev->net, "unexpected interrupt: 0x%08x\n", intdata); + } } static int lan78xx_ethtool_get_eeprom_len(struct net_device *netdev) @@ -1358,7 +1361,7 @@ static void lan78xx_get_wol(struct net_device *netdev, struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); if (usb_autopm_get_interface(dev->intf) < 0) - return; + return; ret = lan78xx_read_reg(dev, USB_CFG0, &buf); if (unlikely(ret < 0)) { @@ -1980,7 +1983,7 @@ static int lan8835_fixup(struct phy_device *phydev) /* RGMII MAC TXC Delay Enable */ lan78xx_write_reg(dev, MAC_RGMII_ID, - MAC_RGMII_ID_TXC_DELAY_EN_); + MAC_RGMII_ID_TXC_DELAY_EN_); /* RGMII TX DLL Tune Adjust */ lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); @@ -3334,9 +3337,10 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev) if (skb) dev_kfree_skb_any(skb); usb_free_urb(urb); - } else + } else { netif_dbg(dev, tx_queued, dev->net, "> tx, len %d, type 0x%x\n", length, skb->protocol); + } } static void lan78xx_rx_bh(struct lan78xx_net *dev) @@ -3437,7 +3441,7 @@ static void lan78xx_delayedwork(struct work_struct *work) unlink_urbs(dev, &dev->rxq); status = usb_autopm_get_interface(dev->intf); if (status < 0) - goto fail_halt; + goto fail_halt; status = usb_clear_halt(dev->udev, dev->pipe_in); usb_autopm_put_interface(dev->intf); if (status < 0 && @@ -3610,8 +3614,8 @@ static int lan78xx_probe(struct usb_interface *intf, struct net_device *netdev; struct usb_device *udev; int ret; - unsigned maxp; - unsigned period; + unsigned int maxp; + unsigned int period; u8 *buf = NULL; udev = interface_to_usbdev(intf); @@ -3842,10 +3846,10 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) /* set WUF_CFG & WUF_MASK for IPv4 Multicast */ crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3); lan78xx_write_reg(dev, WUF_CFG(mask_index), - WUF_CFGX_EN_ | - WUF_CFGX_TYPE_MCAST_ | - (0 << WUF_CFGX_OFFSET_SHIFT_) | - (crc & WUF_CFGX_CRC16_MASK_)); + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_MCAST_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); @@ -3856,10 +3860,10 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) /* for IPv6 Multicast */ crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2); lan78xx_write_reg(dev, WUF_CFG(mask_index), - WUF_CFGX_EN_ | - WUF_CFGX_TYPE_MCAST_ | - (0 << WUF_CFGX_OFFSET_SHIFT_) | - (crc & WUF_CFGX_CRC16_MASK_)); + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_MCAST_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); @@ -3886,10 +3890,10 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) */ crc = lan78xx_wakeframe_crc16(arp_type, 2); lan78xx_write_reg(dev, WUF_CFG(mask_index), - WUF_CFGX_EN_ | - WUF_CFGX_TYPE_ALL_ | - (0 << WUF_CFGX_OFFSET_SHIFT_) | - (crc & WUF_CFGX_CRC16_MASK_)); + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_ALL_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); @@ -4034,7 +4038,7 @@ static int lan78xx_resume(struct usb_interface *intf) if (!--dev->suspend_count) { /* resume interrupt URBs */ if (dev->urb_intr && test_bit(EVENT_DEV_OPEN, &dev->flags)) - usb_submit_urb(dev->urb_intr, GFP_NOIO); + usb_submit_urb(dev->urb_intr, GFP_NOIO); spin_lock_irq(&dev->txq.lock); while ((res = usb_get_from_anchor(&dev->deferred))) { From e5d7f43c4cdd46a4d42dd3c1c971541c1cf2c862 Mon Sep 17 00:00:00 2001 From: John Efstathiades Date: Tue, 24 Aug 2021 19:56:08 +0100 Subject: [PATCH 04/71] lan78xx: Add missing return code checks [ Upstream commit 3415f6baaddb9b39d7112247ab39ef3c700f882e ] There are many places in the driver where the return code from a function call is captured but without a subsequent test of the return code and appropriate action taken. This patch adds the missing return code tests and action. In most cases the action is an early exit from the calling function. The function lan78xx_set_suspend() was also updated to make it consistent with lan78xx_suspend(). Signed-off-by: John Efstathiades Signed-off-by: David S. Miller Stable-dep-of: 1eecc7ab82c4 ("net: lan78xx: fix runtime PM count underflow on link stop") Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 395 ++++++++++++++++++++++++++++++++------ 1 file changed, 331 insertions(+), 64 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 00424f337552..90d6e6c6d0f3 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1165,7 +1165,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) /* clear LAN78xx interrupt status */ ret = lan78xx_write_reg(dev, INT_STS, INT_STS_PHY_INT_); if (unlikely(ret < 0)) - return -EIO; + return ret; mutex_lock(&phydev->lock); phy_read_status(phydev); @@ -1178,11 +1178,11 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) /* reset MAC */ ret = lan78xx_read_reg(dev, MAC_CR, &buf); if (unlikely(ret < 0)) - return -EIO; + return ret; buf |= MAC_CR_RST_; ret = lan78xx_write_reg(dev, MAC_CR, buf); if (unlikely(ret < 0)) - return -EIO; + return ret; del_timer(&dev->stat_monitor); } else if (link && !dev->link_on) { @@ -1194,18 +1194,30 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (ecmd.base.speed == 1000) { /* disable U2 */ ret = lan78xx_read_reg(dev, USB_CFG1, &buf); + if (ret < 0) + return ret; buf &= ~USB_CFG1_DEV_U2_INIT_EN_; ret = lan78xx_write_reg(dev, USB_CFG1, buf); + if (ret < 0) + return ret; /* enable U1 */ ret = lan78xx_read_reg(dev, USB_CFG1, &buf); + if (ret < 0) + return ret; buf |= USB_CFG1_DEV_U1_INIT_EN_; ret = lan78xx_write_reg(dev, USB_CFG1, buf); + if (ret < 0) + return ret; } else { /* enable U1 & U2 */ ret = lan78xx_read_reg(dev, USB_CFG1, &buf); + if (ret < 0) + return ret; buf |= USB_CFG1_DEV_U2_INIT_EN_; buf |= USB_CFG1_DEV_U1_INIT_EN_; ret = lan78xx_write_reg(dev, USB_CFG1, buf); + if (ret < 0) + return ret; } } @@ -1223,6 +1235,8 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) ret = lan78xx_update_flowcontrol(dev, ecmd.base.duplex, ladv, radv); + if (ret < 0) + return ret; if (!timer_pending(&dev->stat_monitor)) { dev->delta = 1; @@ -1233,7 +1247,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) tasklet_schedule(&dev->bh); } - return ret; + return 0; } /* some work can't be done in tasklets, so we use keventd @@ -2426,23 +2440,33 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev) static int lan78xx_reset(struct lan78xx_net *dev) { struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); - u32 buf; - int ret = 0; unsigned long timeout; + int ret; + u32 buf; u8 sig; ret = lan78xx_read_reg(dev, HW_CFG, &buf); + if (ret < 0) + return ret; + buf |= HW_CFG_LRST_; + ret = lan78xx_write_reg(dev, HW_CFG, buf); + if (ret < 0) + return ret; timeout = jiffies + HZ; do { mdelay(1); ret = lan78xx_read_reg(dev, HW_CFG, &buf); + if (ret < 0) + return ret; + if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on completion of LiteReset"); - return -EIO; + ret = -ETIMEDOUT; + return ret; } } while (buf & HW_CFG_LRST_); @@ -2450,13 +2474,22 @@ static int lan78xx_reset(struct lan78xx_net *dev) /* save DEVID for later usage */ ret = lan78xx_read_reg(dev, ID_REV, &buf); + if (ret < 0) + return ret; + dev->chipid = (buf & ID_REV_CHIP_ID_MASK_) >> 16; dev->chiprev = buf & ID_REV_CHIP_REV_MASK_; /* Respond to the IN token with a NAK */ ret = lan78xx_read_reg(dev, USB_CFG0, &buf); + if (ret < 0) + return ret; + buf |= USB_CFG_BIR_; + ret = lan78xx_write_reg(dev, USB_CFG0, buf); + if (ret < 0) + return ret; /* Init LTM */ lan78xx_init_ltm(dev); @@ -2479,53 +2512,105 @@ static int lan78xx_reset(struct lan78xx_net *dev) } ret = lan78xx_write_reg(dev, BURST_CAP, buf); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, BULK_IN_DLY, DEFAULT_BULK_IN_DELAY); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, HW_CFG, &buf); + if (ret < 0) + return ret; + buf |= HW_CFG_MEF_; + ret = lan78xx_write_reg(dev, HW_CFG, buf); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, USB_CFG0, &buf); + if (ret < 0) + return ret; + buf |= USB_CFG_BCE_; + ret = lan78xx_write_reg(dev, USB_CFG0, buf); + if (ret < 0) + return ret; /* set FIFO sizes */ buf = (MAX_RX_FIFO_SIZE - 512) / 512; + ret = lan78xx_write_reg(dev, FCT_RX_FIFO_END, buf); + if (ret < 0) + return ret; buf = (MAX_TX_FIFO_SIZE - 512) / 512; + ret = lan78xx_write_reg(dev, FCT_TX_FIFO_END, buf); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, INT_STS, INT_STS_CLEAR_ALL_); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, FLOW, 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, FCT_FLOW, 0); + if (ret < 0) + return ret; /* Don't need rfe_ctl_lock during initialisation */ ret = lan78xx_read_reg(dev, RFE_CTL, &pdata->rfe_ctl); + if (ret < 0) + return ret; + pdata->rfe_ctl |= RFE_CTL_BCAST_EN_ | RFE_CTL_DA_PERFECT_; + ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + if (ret < 0) + return ret; /* Enable or disable checksum offload engines */ - lan78xx_set_features(dev->net, dev->net->features); + ret = lan78xx_set_features(dev->net, dev->net->features); + if (ret < 0) + return ret; lan78xx_set_multicast(dev->net); /* reset PHY */ ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + return ret; + buf |= PMT_CTL_PHY_RST_; + ret = lan78xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) + return ret; timeout = jiffies + HZ; do { mdelay(1); ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + return ret; + if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout waiting for PHY Reset"); - return -EIO; + ret = -ETIMEDOUT; + return ret; } } while ((buf & PMT_CTL_PHY_RST_) || !(buf & PMT_CTL_READY_)); ret = lan78xx_read_reg(dev, MAC_CR, &buf); + if (ret < 0) + return ret; + /* LAN7801 only has RGMII mode */ if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; @@ -2540,25 +2625,53 @@ static int lan78xx_reset(struct lan78xx_net *dev) } } ret = lan78xx_write_reg(dev, MAC_CR, buf); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, MAC_TX, &buf); + if (ret < 0) + return ret; + buf |= MAC_TX_TXEN_; + ret = lan78xx_write_reg(dev, MAC_TX, buf); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, FCT_TX_CTL, &buf); + if (ret < 0) + return ret; + buf |= FCT_TX_CTL_EN_; + ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf); + if (ret < 0) + return ret; ret = lan78xx_set_rx_max_frame_length(dev, dev->net->mtu + VLAN_ETH_HLEN); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, MAC_RX, &buf); + if (ret < 0) + return ret; + buf |= MAC_RX_RXEN_; + ret = lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, FCT_RX_CTL, &buf); + if (ret < 0) + return ret; + buf |= FCT_RX_CTL_EN_; + ret = lan78xx_write_reg(dev, FCT_RX_CTL, buf); + if (ret < 0) + return ret; return 0; } @@ -2596,7 +2709,7 @@ static int lan78xx_open(struct net_device *net) ret = usb_autopm_get_interface(dev->intf); if (ret < 0) - goto out; + return ret; phy_start(net->phydev); @@ -2624,7 +2737,6 @@ static int lan78xx_open(struct net_device *net) done: usb_autopm_put_interface(dev->intf); -out: return ret; } @@ -3788,35 +3900,62 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len) static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) { - u32 buf; - int mask_index; - u16 crc; - u32 temp_wucsr; - u32 temp_pmt_ctl; const u8 ipv4_multicast[3] = { 0x01, 0x00, 0x5E }; const u8 ipv6_multicast[3] = { 0x33, 0x33 }; const u8 arp_type[2] = { 0x08, 0x06 }; + u32 temp_pmt_ctl; + int mask_index; + u32 temp_wucsr; + u32 buf; + u16 crc; + int ret; + + ret = lan78xx_read_reg(dev, MAC_TX, &buf); + if (ret < 0) + return ret; - lan78xx_read_reg(dev, MAC_TX, &buf); buf &= ~MAC_TX_TXEN_; - lan78xx_write_reg(dev, MAC_TX, buf); - lan78xx_read_reg(dev, MAC_RX, &buf); - buf &= ~MAC_RX_RXEN_; - lan78xx_write_reg(dev, MAC_RX, buf); - lan78xx_write_reg(dev, WUCSR, 0); - lan78xx_write_reg(dev, WUCSR2, 0); - lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); + ret = lan78xx_write_reg(dev, MAC_TX, buf); + if (ret < 0) + return ret; + + ret = lan78xx_read_reg(dev, MAC_RX, &buf); + if (ret < 0) + return ret; + + buf &= ~MAC_RX_RXEN_; + + ret = lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, WUCSR, 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUCSR2, 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); + if (ret < 0) + return ret; temp_wucsr = 0; temp_pmt_ctl = 0; - lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl); + + ret = lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl); + if (ret < 0) + return ret; + temp_pmt_ctl &= ~PMT_CTL_RES_CLR_WKP_EN_; temp_pmt_ctl |= PMT_CTL_RES_CLR_WKP_STS_; - for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++) - lan78xx_write_reg(dev, WUF_CFG(mask_index), 0); + for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++) { + ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), 0); + if (ret < 0) + return ret; + } mask_index = 0; if (wol & WAKE_PHY) { @@ -3845,30 +3984,52 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) /* set WUF_CFG & WUF_MASK for IPv4 Multicast */ crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3); - lan78xx_write_reg(dev, WUF_CFG(mask_index), - WUF_CFGX_EN_ | - WUF_CFGX_TYPE_MCAST_ | - (0 << WUF_CFGX_OFFSET_SHIFT_) | - (crc & WUF_CFGX_CRC16_MASK_)); + ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_MCAST_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + if (ret < 0) + return ret; - lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); - lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; /* for IPv6 Multicast */ crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2); - lan78xx_write_reg(dev, WUF_CFG(mask_index), - WUF_CFGX_EN_ | - WUF_CFGX_TYPE_MCAST_ | - (0 << WUF_CFGX_OFFSET_SHIFT_) | - (crc & WUF_CFGX_CRC16_MASK_)); + ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_MCAST_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + if (ret < 0) + return ret; - lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); - lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; temp_pmt_ctl |= PMT_CTL_WOL_EN_; @@ -3889,16 +4050,27 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) * for packettype (offset 12,13) = ARP (0x0806) */ crc = lan78xx_wakeframe_crc16(arp_type, 2); - lan78xx_write_reg(dev, WUF_CFG(mask_index), - WUF_CFGX_EN_ | - WUF_CFGX_TYPE_ALL_ | - (0 << WUF_CFGX_OFFSET_SHIFT_) | - (crc & WUF_CFGX_CRC16_MASK_)); + ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_ALL_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + if (ret < 0) + return ret; - lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); - lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); - lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); - lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); mask_index++; temp_pmt_ctl |= PMT_CTL_WOL_EN_; @@ -3906,7 +4078,9 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_; } - lan78xx_write_reg(dev, WUCSR, temp_wucsr); + ret = lan78xx_write_reg(dev, WUCSR, temp_wucsr); + if (ret < 0) + return ret; /* when multiple WOL bits are set */ if (hweight_long((unsigned long)wol) > 1) { @@ -3914,16 +4088,30 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) temp_pmt_ctl &= ~PMT_CTL_SUS_MODE_MASK_; temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_; } - lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl); + ret = lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl); + if (ret < 0) + return ret; /* clear WUPS */ - lan78xx_read_reg(dev, PMT_CTL, &buf); + ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + return ret; + buf |= PMT_CTL_WUPS_MASK_; - lan78xx_write_reg(dev, PMT_CTL, buf); + + ret = lan78xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) + return ret; lan78xx_read_reg(dev, MAC_RX, &buf); + if (ret < 0) + return ret; + buf |= MAC_RX_RXEN_; + lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; return 0; } @@ -3931,7 +4119,6 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) { struct lan78xx_net *dev = usb_get_intfdata(intf); - struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); u32 buf; int ret; @@ -3951,11 +4138,24 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) /* stop TX & RX */ ret = lan78xx_read_reg(dev, MAC_TX, &buf); + if (ret < 0) + return ret; + buf &= ~MAC_TX_TXEN_; + ret = lan78xx_write_reg(dev, MAC_TX, buf); + if (ret < 0) + return ret; + ret = lan78xx_read_reg(dev, MAC_RX, &buf); + if (ret < 0) + return ret; + buf &= ~MAC_RX_RXEN_; + ret = lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; /* empty out the rx and queues */ netif_device_detach(dev->net); @@ -3972,25 +4172,50 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) if (PMSG_IS_AUTO(message)) { /* auto suspend (selective suspend) */ ret = lan78xx_read_reg(dev, MAC_TX, &buf); + if (ret < 0) + return ret; + buf &= ~MAC_TX_TXEN_; + ret = lan78xx_write_reg(dev, MAC_TX, buf); + if (ret < 0) + return ret; + ret = lan78xx_read_reg(dev, MAC_RX, &buf); + if (ret < 0) + return ret; + buf &= ~MAC_RX_RXEN_; + ret = lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, WUCSR, 0); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, WUCSR2, 0); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); + if (ret < 0) + return ret; /* set goodframe wakeup */ ret = lan78xx_read_reg(dev, WUCSR, &buf); + if (ret < 0) + return ret; buf |= WUCSR_RFE_WAKE_EN_; buf |= WUCSR_STORE_WAKE_; ret = lan78xx_write_reg(dev, WUCSR, buf); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + return ret; buf &= ~PMT_CTL_RES_CLR_WKP_EN_; buf |= PMT_CTL_RES_CLR_WKP_STS_; @@ -4001,18 +4226,36 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) buf |= PMT_CTL_SUS_MODE_3_; ret = lan78xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + return ret; buf |= PMT_CTL_WUPS_MASK_; ret = lan78xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, MAC_RX, &buf); + if (ret < 0) + return ret; + buf |= MAC_RX_RXEN_; + ret = lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; } else { - lan78xx_set_suspend(dev, pdata->wol); + struct lan78xx_priv *pdata; + + pdata = (struct lan78xx_priv *)(dev->data[0]); + + ret = lan78xx_set_suspend(dev, pdata->wol); + if (ret < 0) + return ret; } } @@ -4037,8 +4280,11 @@ static int lan78xx_resume(struct usb_interface *intf) if (!--dev->suspend_count) { /* resume interrupt URBs */ - if (dev->urb_intr && test_bit(EVENT_DEV_OPEN, &dev->flags)) - usb_submit_urb(dev->urb_intr, GFP_NOIO); + if (dev->urb_intr && test_bit(EVENT_DEV_OPEN, &dev->flags)) { + ret = usb_submit_urb(dev->urb_intr, GFP_NOIO); + if (ret < 0) + return ret; + } spin_lock_irq(&dev->txq.lock); while ((res = usb_get_from_anchor(&dev->deferred))) { @@ -4065,13 +4311,21 @@ static int lan78xx_resume(struct usb_interface *intf) } ret = lan78xx_write_reg(dev, WUCSR2, 0); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, WUCSR, 0); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, WUCSR2, WUCSR2_NS_RCD_ | WUCSR2_ARP_RCD_ | WUCSR2_IPV6_TCPSYN_RCD_ | WUCSR2_IPV4_TCPSYN_RCD_); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, WUCSR, WUCSR_EEE_TX_WAKE_ | WUCSR_EEE_RX_WAKE_ | @@ -4080,10 +4334,18 @@ static int lan78xx_resume(struct usb_interface *intf) WUCSR_WUFR_ | WUCSR_MPR_ | WUCSR_BCST_FR_); + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, MAC_TX, &buf); + if (ret < 0) + return ret; + buf |= MAC_TX_TXEN_; + ret = lan78xx_write_reg(dev, MAC_TX, buf); + if (ret < 0) + return ret; return 0; } @@ -4091,12 +4353,17 @@ static int lan78xx_resume(struct usb_interface *intf) static int lan78xx_reset_resume(struct usb_interface *intf) { struct lan78xx_net *dev = usb_get_intfdata(intf); + int ret; - lan78xx_reset(dev); + ret = lan78xx_reset(dev); + if (ret < 0) + return ret; phy_start(dev->net->phydev); - return lan78xx_resume(intf); + ret = lan78xx_resume(intf); + + return ret; } static const struct usb_device_id products[] = { From 69215f8edae38a7d43ef9537b232c41a3a5fc2f4 Mon Sep 17 00:00:00 2001 From: John Efstathiades Date: Tue, 24 Aug 2021 19:56:10 +0100 Subject: [PATCH 05/71] lan78xx: Fix partial packet errors on suspend/resume [ Upstream commit e1210fe63bf8b080edd0805240e90b81b6b069c1 ] The MAC can get out of step with the internal packet FIFOs if the system goes to sleep when the link is active, especially at high data rates. This can result in partial frames in the packet FIFOs that in result in malformed frames being delivered to the host. This occurs because the driver does not enable/disable the internal packet FIFOs in step with the corresponding MAC data path. The following changes fix this problem. Update code that enables/disables the MAC receiver and transmitter to the more general Rx and Tx data path, where the data path in each direction consists of both the MAC function (Tx or Rx) and the corresponding packet FIFO. In the receive path the packet FIFO must be enabled before the MAC receiver but disabled after the MAC receiver. In the transmit path the opposite is true: the packet FIFO must be enabled after the MAC transmitter but disabled before the MAC transmitter. The packet FIFOs can be flushed safely once the corresponding data path is stopped. Signed-off-by: John Efstathiades Signed-off-by: David S. Miller Stable-dep-of: 1eecc7ab82c4 ("net: lan78xx: fix runtime PM count underflow on link stop") Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 303 +++++++++++++++++++++++++------------- 1 file changed, 197 insertions(+), 106 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 90d6e6c6d0f3..18269651e84c 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -90,6 +90,12 @@ /* statistic update interval (mSec) */ #define STAT_UPDATE_TIMER (1 * 1000) +/* time to wait for MAC or FCT to stop (jiffies) */ +#define HW_DISABLE_TIMEOUT (HZ / 10) + +/* time to wait between polling MAC or FCT state (ms) */ +#define HW_DISABLE_DELAY_MS 1 + /* defines interrupts from interrupt EP */ #define MAX_INT_EP (32) #define INT_EP_INTEP (31) @@ -479,6 +485,26 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data) return ret; } +static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask, + u32 data) +{ + int ret; + u32 buf; + + ret = lan78xx_read_reg(dev, reg, &buf); + if (ret < 0) + return ret; + + buf &= ~mask; + buf |= (mask & data); + + ret = lan78xx_write_reg(dev, reg, buf); + if (ret < 0) + return ret; + + return 0; +} + static int lan78xx_read_stats(struct lan78xx_net *dev, struct lan78xx_statstage *data) { @@ -2437,6 +2463,156 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev) lan78xx_write_reg(dev, LTM_INACTIVE1, regs[5]); } +static int lan78xx_start_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enable) +{ + return lan78xx_update_reg(dev, reg, hw_enable, hw_enable); +} + +static int lan78xx_stop_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enabled, + u32 hw_disabled) +{ + unsigned long timeout; + bool stopped = true; + int ret; + u32 buf; + + /* Stop the h/w block (if not already stopped) */ + + ret = lan78xx_read_reg(dev, reg, &buf); + if (ret < 0) + return ret; + + if (buf & hw_enabled) { + buf &= ~hw_enabled; + + ret = lan78xx_write_reg(dev, reg, buf); + if (ret < 0) + return ret; + + stopped = false; + timeout = jiffies + HW_DISABLE_TIMEOUT; + do { + ret = lan78xx_read_reg(dev, reg, &buf); + if (ret < 0) + return ret; + + if (buf & hw_disabled) + stopped = true; + else + msleep(HW_DISABLE_DELAY_MS); + } while (!stopped && !time_after(jiffies, timeout)); + } + + ret = stopped ? 0 : -ETIME; + + return ret; +} + +static int lan78xx_flush_fifo(struct lan78xx_net *dev, u32 reg, u32 fifo_flush) +{ + return lan78xx_update_reg(dev, reg, fifo_flush, fifo_flush); +} + +static int lan78xx_start_tx_path(struct lan78xx_net *dev) +{ + int ret; + + netif_dbg(dev, drv, dev->net, "start tx path"); + + /* Start the MAC transmitter */ + + ret = lan78xx_start_hw(dev, MAC_TX, MAC_TX_TXEN_); + if (ret < 0) + return ret; + + /* Start the Tx FIFO */ + + ret = lan78xx_start_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_); + if (ret < 0) + return ret; + + return 0; +} + +static int lan78xx_stop_tx_path(struct lan78xx_net *dev) +{ + int ret; + + netif_dbg(dev, drv, dev->net, "stop tx path"); + + /* Stop the Tx FIFO */ + + ret = lan78xx_stop_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_, FCT_TX_CTL_DIS_); + if (ret < 0) + return ret; + + /* Stop the MAC transmitter */ + + ret = lan78xx_stop_hw(dev, MAC_TX, MAC_TX_TXEN_, MAC_TX_TXD_); + if (ret < 0) + return ret; + + return 0; +} + +/* The caller must ensure the Tx path is stopped before calling + * lan78xx_flush_tx_fifo(). + */ +static int lan78xx_flush_tx_fifo(struct lan78xx_net *dev) +{ + return lan78xx_flush_fifo(dev, FCT_TX_CTL, FCT_TX_CTL_RST_); +} + +static int lan78xx_start_rx_path(struct lan78xx_net *dev) +{ + int ret; + + netif_dbg(dev, drv, dev->net, "start rx path"); + + /* Start the Rx FIFO */ + + ret = lan78xx_start_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_); + if (ret < 0) + return ret; + + /* Start the MAC receiver*/ + + ret = lan78xx_start_hw(dev, MAC_RX, MAC_RX_RXEN_); + if (ret < 0) + return ret; + + return 0; +} + +static int lan78xx_stop_rx_path(struct lan78xx_net *dev) +{ + int ret; + + netif_dbg(dev, drv, dev->net, "stop rx path"); + + /* Stop the MAC receiver */ + + ret = lan78xx_stop_hw(dev, MAC_RX, MAC_RX_RXEN_, MAC_RX_RXD_); + if (ret < 0) + return ret; + + /* Stop the Rx FIFO */ + + ret = lan78xx_stop_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_, FCT_RX_CTL_DIS_); + if (ret < 0) + return ret; + + return 0; +} + +/* The caller must ensure the Rx path is stopped before calling + * lan78xx_flush_rx_fifo(). + */ +static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev) +{ + return lan78xx_flush_fifo(dev, FCT_RX_CTL, FCT_RX_CTL_RST_); +} + static int lan78xx_reset(struct lan78xx_net *dev) { struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); @@ -2628,23 +2804,7 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (ret < 0) return ret; - ret = lan78xx_read_reg(dev, MAC_TX, &buf); - if (ret < 0) - return ret; - - buf |= MAC_TX_TXEN_; - - ret = lan78xx_write_reg(dev, MAC_TX, buf); - if (ret < 0) - return ret; - - ret = lan78xx_read_reg(dev, FCT_TX_CTL, &buf); - if (ret < 0) - return ret; - - buf |= FCT_TX_CTL_EN_; - - ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf); + ret = lan78xx_start_tx_path(dev); if (ret < 0) return ret; @@ -2653,27 +2813,9 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (ret < 0) return ret; - ret = lan78xx_read_reg(dev, MAC_RX, &buf); - if (ret < 0) - return ret; + ret = lan78xx_start_rx_path(dev); - buf |= MAC_RX_RXEN_; - - ret = lan78xx_write_reg(dev, MAC_RX, buf); - if (ret < 0) - return ret; - - ret = lan78xx_read_reg(dev, FCT_RX_CTL, &buf); - if (ret < 0) - return ret; - - buf |= FCT_RX_CTL_EN_; - - ret = lan78xx_write_reg(dev, FCT_RX_CTL, buf); - if (ret < 0) - return ret; - - return 0; + return ret; } static void lan78xx_init_stats(struct lan78xx_net *dev) @@ -3910,23 +4052,10 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) u16 crc; int ret; - ret = lan78xx_read_reg(dev, MAC_TX, &buf); + ret = lan78xx_stop_tx_path(dev); if (ret < 0) return ret; - - buf &= ~MAC_TX_TXEN_; - - ret = lan78xx_write_reg(dev, MAC_TX, buf); - if (ret < 0) - return ret; - - ret = lan78xx_read_reg(dev, MAC_RX, &buf); - if (ret < 0) - return ret; - - buf &= ~MAC_RX_RXEN_; - - ret = lan78xx_write_reg(dev, MAC_RX, buf); + ret = lan78xx_stop_rx_path(dev); if (ret < 0) return ret; @@ -4103,17 +4232,9 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) if (ret < 0) return ret; - lan78xx_read_reg(dev, MAC_RX, &buf); - if (ret < 0) - return ret; + ret = lan78xx_start_rx_path(dev); - buf |= MAC_RX_RXEN_; - - lan78xx_write_reg(dev, MAC_RX, buf); - if (ret < 0) - return ret; - - return 0; + return ret; } static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) @@ -4136,24 +4257,17 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) spin_unlock_irq(&dev->txq.lock); } - /* stop TX & RX */ - ret = lan78xx_read_reg(dev, MAC_TX, &buf); + /* stop RX */ + ret = lan78xx_stop_rx_path(dev); if (ret < 0) return ret; - buf &= ~MAC_TX_TXEN_; - - ret = lan78xx_write_reg(dev, MAC_TX, buf); + ret = lan78xx_flush_rx_fifo(dev); if (ret < 0) return ret; - ret = lan78xx_read_reg(dev, MAC_RX, &buf); - if (ret < 0) - return ret; - - buf &= ~MAC_RX_RXEN_; - - ret = lan78xx_write_reg(dev, MAC_RX, buf); + /* stop Tx */ + ret = lan78xx_stop_tx_path(dev); if (ret < 0) return ret; @@ -4171,23 +4285,11 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) if (PMSG_IS_AUTO(message)) { /* auto suspend (selective suspend) */ - ret = lan78xx_read_reg(dev, MAC_TX, &buf); + ret = lan78xx_stop_tx_path(dev); if (ret < 0) return ret; - buf &= ~MAC_TX_TXEN_; - - ret = lan78xx_write_reg(dev, MAC_TX, buf); - if (ret < 0) - return ret; - - ret = lan78xx_read_reg(dev, MAC_RX, &buf); - if (ret < 0) - return ret; - - buf &= ~MAC_RX_RXEN_; - - ret = lan78xx_write_reg(dev, MAC_RX, buf); + ret = lan78xx_stop_rx_path(dev); if (ret < 0) return ret; @@ -4239,13 +4341,7 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) if (ret < 0) return ret; - ret = lan78xx_read_reg(dev, MAC_RX, &buf); - if (ret < 0) - return ret; - - buf |= MAC_RX_RXEN_; - - ret = lan78xx_write_reg(dev, MAC_RX, buf); + ret = lan78xx_start_rx_path(dev); if (ret < 0) return ret; } else { @@ -4270,7 +4366,6 @@ static int lan78xx_resume(struct usb_interface *intf) struct sk_buff *skb; struct urb *res; int ret; - u32 buf; if (!timer_pending(&dev->stat_monitor)) { dev->delta = 1; @@ -4278,6 +4373,10 @@ static int lan78xx_resume(struct usb_interface *intf) jiffies + STAT_UPDATE_TIMER); } + ret = lan78xx_flush_tx_fifo(dev); + if (ret < 0) + return ret; + if (!--dev->suspend_count) { /* resume interrupt URBs */ if (dev->urb_intr && test_bit(EVENT_DEV_OPEN, &dev->flags)) { @@ -4337,17 +4436,9 @@ static int lan78xx_resume(struct usb_interface *intf) if (ret < 0) return ret; - ret = lan78xx_read_reg(dev, MAC_TX, &buf); - if (ret < 0) - return ret; + ret = lan78xx_start_tx_path(dev); - buf |= MAC_TX_TXEN_; - - ret = lan78xx_write_reg(dev, MAC_TX, buf); - if (ret < 0) - return ret; - - return 0; + return ret; } static int lan78xx_reset_resume(struct usb_interface *intf) From 11a3c9f489403de6050d9374687df75d8e78b904 Mon Sep 17 00:00:00 2001 From: John Efstathiades Date: Tue, 24 Aug 2021 19:56:11 +0100 Subject: [PATCH 06/71] lan78xx: Fix race conditions in suspend/resume handling [ Upstream commit 5f4cc6e25148cc141f97afb41b4dfe9eb1cce613 ] If the interface is given an IP address while the device is suspended (as a result of an auto-suspend event) there is a race between lan78xx_resume() and lan78xx_open() that can result in an exception or failure to handle incoming packets. The following changes fix this problem. Introduce a mutex to serialise operations in the network interface open and stop entry points with respect to the USB driver suspend and resume entry points. Move Tx and Rx data path start/stop to lan78xx_start() and lan78xx_stop() respectively and flush the packet FIFOs before starting the Tx and Rx data paths. This prevents the MAC and FIFOs getting out of step and delivery of malformed packets to the network stack. Stop processing of received packets before disconnecting the PHY from the MAC to prevent a kernel exception caused by handling packets after the PHY device has been removed. Refactor device auto-suspend code to make it consistent with the the system suspend code and make the suspend handler easier to read. Add new code to stop wake-on-lan packets or PHY events resuming the host or device from suspend if the device has not been opened (typically after an IP address is assigned). This patch is dependent on changes to lan78xx_suspend() and lan78xx_resume() introduced in the previous patch of this patch set. Signed-off-by: John Efstathiades Signed-off-by: David S. Miller Stable-dep-of: 1eecc7ab82c4 ("net: lan78xx: fix runtime PM count underflow on link stop") Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 433 +++++++++++++++++++++++++------------- 1 file changed, 291 insertions(+), 142 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 18269651e84c..c177a8676199 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -390,6 +390,7 @@ struct lan78xx_net { struct urb *urb_intr; struct usb_anchor deferred; + struct mutex dev_mutex; /* serialise open/stop wrt suspend/resume */ struct mutex phy_mutex; /* for phy access */ unsigned int pipe_in, pipe_out, pipe_intr; @@ -2287,11 +2288,16 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) int ll_mtu = new_mtu + netdev->hard_header_len; int old_hard_mtu = dev->hard_mtu; int old_rx_urb_size = dev->rx_urb_size; + int ret; /* no second zero-length packet read wanted after mtu-sized packets */ if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; + ret = usb_autopm_get_interface(dev->intf); + if (ret < 0) + return ret; + lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); netdev->mtu = new_mtu; @@ -2307,6 +2313,8 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) } } + usb_autopm_put_interface(dev->intf); + return 0; } @@ -2804,16 +2812,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) if (ret < 0) return ret; - ret = lan78xx_start_tx_path(dev); - if (ret < 0) - return ret; - ret = lan78xx_set_rx_max_frame_length(dev, dev->net->mtu + VLAN_ETH_HLEN); - if (ret < 0) - return ret; - - ret = lan78xx_start_rx_path(dev); return ret; } @@ -2849,10 +2849,14 @@ static int lan78xx_open(struct net_device *net) struct lan78xx_net *dev = netdev_priv(net); int ret; + netif_dbg(dev, ifup, dev->net, "open device"); + ret = usb_autopm_get_interface(dev->intf); if (ret < 0) return ret; + mutex_lock(&dev->dev_mutex); + phy_start(net->phydev); netif_dbg(dev, ifup, dev->net, "phy initialised successfully"); @@ -2867,6 +2871,20 @@ static int lan78xx_open(struct net_device *net) } } + ret = lan78xx_flush_rx_fifo(dev); + if (ret < 0) + goto done; + ret = lan78xx_flush_tx_fifo(dev); + if (ret < 0) + goto done; + + ret = lan78xx_start_tx_path(dev); + if (ret < 0) + goto done; + ret = lan78xx_start_rx_path(dev); + if (ret < 0) + goto done; + lan78xx_init_stats(dev); set_bit(EVENT_DEV_OPEN, &dev->flags); @@ -2877,6 +2895,8 @@ static int lan78xx_open(struct net_device *net) lan78xx_defer_kevent(dev, EVENT_LINK_RESET); done: + mutex_unlock(&dev->dev_mutex); + usb_autopm_put_interface(dev->intf); return ret; @@ -2895,38 +2915,56 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev) temp = unlink_urbs(dev, &dev->txq) + unlink_urbs(dev, &dev->rxq); /* maybe wait for deletions to finish. */ - while (!skb_queue_empty(&dev->rxq) && - !skb_queue_empty(&dev->txq) && - !skb_queue_empty(&dev->done)) { + while (!skb_queue_empty(&dev->rxq) || + !skb_queue_empty(&dev->txq)) { schedule_timeout(msecs_to_jiffies(UNLINK_TIMEOUT_MS)); set_current_state(TASK_UNINTERRUPTIBLE); netif_dbg(dev, ifdown, dev->net, - "waited for %d urb completions\n", temp); + "waited for %d urb completions", temp); } set_current_state(TASK_RUNNING); dev->wait = NULL; remove_wait_queue(&unlink_wakeup, &wait); + + while (!skb_queue_empty(&dev->done)) { + struct skb_data *entry; + struct sk_buff *skb; + + skb = skb_dequeue(&dev->done); + entry = (struct skb_data *)(skb->cb); + usb_free_urb(entry->urb); + dev_kfree_skb(skb); + } } static int lan78xx_stop(struct net_device *net) { struct lan78xx_net *dev = netdev_priv(net); + netif_dbg(dev, ifup, dev->net, "stop device"); + + mutex_lock(&dev->dev_mutex); + if (timer_pending(&dev->stat_monitor)) del_timer_sync(&dev->stat_monitor); - if (net->phydev) - phy_stop(net->phydev); - clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue(net); + tasklet_kill(&dev->bh); + + lan78xx_terminate_urbs(dev); netif_info(dev, ifdown, dev->net, "stop stats: rx/tx %lu/%lu, errs %lu/%lu\n", net->stats.rx_packets, net->stats.tx_packets, net->stats.rx_errors, net->stats.tx_errors); - lan78xx_terminate_urbs(dev); + /* ignore errors that occur stopping the Tx and Rx data paths */ + lan78xx_stop_tx_path(dev); + lan78xx_stop_rx_path(dev); + + if (net->phydev) + phy_stop(net->phydev); usb_kill_urb(dev->urb_intr); @@ -2936,12 +2974,17 @@ static int lan78xx_stop(struct net_device *net) * can't flush_scheduled_work() until we drop rtnl (later), * else workers could deadlock; so make workers a NOP. */ - dev->flags = 0; + clear_bit(EVENT_TX_HALT, &dev->flags); + clear_bit(EVENT_RX_HALT, &dev->flags); + clear_bit(EVENT_LINK_RESET, &dev->flags); + clear_bit(EVENT_STAT_UPDATE, &dev->flags); + cancel_delayed_work_sync(&dev->wq); - tasklet_kill(&dev->bh); usb_autopm_put_interface(dev->intf); + mutex_unlock(&dev->dev_mutex); + return 0; } @@ -3064,6 +3107,9 @@ lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net) struct lan78xx_net *dev = netdev_priv(net); struct sk_buff *skb2 = NULL; + if (test_bit(EVENT_DEV_ASLEEP, &dev->flags)) + schedule_delayed_work(&dev->wq, 0); + if (skb) { skb_tx_timestamp(skb); skb2 = lan78xx_tx_prep(dev, skb, GFP_ATOMIC); @@ -3670,18 +3716,17 @@ static void lan78xx_delayedwork(struct work_struct *work) dev = container_of(work, struct lan78xx_net, wq.work); + if (usb_autopm_get_interface(dev->intf) < 0) + return; + if (test_bit(EVENT_TX_HALT, &dev->flags)) { unlink_urbs(dev, &dev->txq); - status = usb_autopm_get_interface(dev->intf); - if (status < 0) - goto fail_pipe; + status = usb_clear_halt(dev->udev, dev->pipe_out); - usb_autopm_put_interface(dev->intf); if (status < 0 && status != -EPIPE && status != -ESHUTDOWN) { if (netif_msg_tx_err(dev)) -fail_pipe: netdev_err(dev->net, "can't clear tx halt, status %d\n", status); @@ -3691,18 +3736,14 @@ static void lan78xx_delayedwork(struct work_struct *work) netif_wake_queue(dev->net); } } + if (test_bit(EVENT_RX_HALT, &dev->flags)) { unlink_urbs(dev, &dev->rxq); - status = usb_autopm_get_interface(dev->intf); - if (status < 0) - goto fail_halt; status = usb_clear_halt(dev->udev, dev->pipe_in); - usb_autopm_put_interface(dev->intf); if (status < 0 && status != -EPIPE && status != -ESHUTDOWN) { if (netif_msg_rx_err(dev)) -fail_halt: netdev_err(dev->net, "can't clear rx halt, status %d\n", status); @@ -3716,16 +3757,9 @@ static void lan78xx_delayedwork(struct work_struct *work) int ret = 0; clear_bit(EVENT_LINK_RESET, &dev->flags); - status = usb_autopm_get_interface(dev->intf); - if (status < 0) - goto skip_reset; if (lan78xx_link_reset(dev) < 0) { - usb_autopm_put_interface(dev->intf); -skip_reset: netdev_info(dev->net, "link reset failed (%d)\n", ret); - } else { - usb_autopm_put_interface(dev->intf); } } @@ -3739,6 +3773,8 @@ static void lan78xx_delayedwork(struct work_struct *work) dev->delta = min((dev->delta * 2), 50); } + + usb_autopm_put_interface(dev->intf); } static void intr_complete(struct urb *urb) @@ -3898,6 +3934,7 @@ static int lan78xx_probe(struct usb_interface *intf, skb_queue_head_init(&dev->rxq_pause); skb_queue_head_init(&dev->txq_pend); mutex_init(&dev->phy_mutex); + mutex_init(&dev->dev_mutex); tasklet_init(&dev->bh, lan78xx_bh, (unsigned long)dev); INIT_DELAYED_WORK(&dev->wq, lan78xx_delayedwork); @@ -4040,6 +4077,74 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len) return crc; } +static int lan78xx_set_auto_suspend(struct lan78xx_net *dev) +{ + u32 buf; + int ret; + + ret = lan78xx_stop_tx_path(dev); + if (ret < 0) + return ret; + + ret = lan78xx_stop_rx_path(dev); + if (ret < 0) + return ret; + + /* auto suspend (selective suspend) */ + + ret = lan78xx_write_reg(dev, WUCSR, 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WUCSR2, 0); + if (ret < 0) + return ret; + ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); + if (ret < 0) + return ret; + + /* set goodframe wakeup */ + + ret = lan78xx_read_reg(dev, WUCSR, &buf); + if (ret < 0) + return ret; + + buf |= WUCSR_RFE_WAKE_EN_; + buf |= WUCSR_STORE_WAKE_; + + ret = lan78xx_write_reg(dev, WUCSR, buf); + if (ret < 0) + return ret; + + ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + return ret; + + buf &= ~PMT_CTL_RES_CLR_WKP_EN_; + buf |= PMT_CTL_RES_CLR_WKP_STS_; + buf |= PMT_CTL_PHY_WAKE_EN_; + buf |= PMT_CTL_WOL_EN_; + buf &= ~PMT_CTL_SUS_MODE_MASK_; + buf |= PMT_CTL_SUS_MODE_3_; + + ret = lan78xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) + return ret; + + ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + return ret; + + buf |= PMT_CTL_WUPS_MASK_; + + ret = lan78xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) + return ret; + + ret = lan78xx_start_rx_path(dev); + + return ret; +} + static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) { const u8 ipv4_multicast[3] = { 0x01, 0x00, 0x5E }; @@ -4240,15 +4345,22 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) { struct lan78xx_net *dev = usb_get_intfdata(intf); - u32 buf; + bool dev_open; int ret; - if (!dev->suspend_count++) { + mutex_lock(&dev->dev_mutex); + + netif_dbg(dev, ifdown, dev->net, + "suspending: pm event %#x", message.event); + + dev_open = test_bit(EVENT_DEV_OPEN, &dev->flags); + + if (dev_open) { spin_lock_irq(&dev->txq.lock); /* don't autosuspend while transmitting */ if ((skb_queue_len(&dev->txq) || skb_queue_len(&dev->txq_pend)) && - PMSG_IS_AUTO(message)) { + PMSG_IS_AUTO(message)) { spin_unlock_irq(&dev->txq.lock); ret = -EBUSY; goto out; @@ -4260,171 +4372,204 @@ static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) /* stop RX */ ret = lan78xx_stop_rx_path(dev); if (ret < 0) - return ret; + goto out; ret = lan78xx_flush_rx_fifo(dev); if (ret < 0) - return ret; + goto out; /* stop Tx */ ret = lan78xx_stop_tx_path(dev); if (ret < 0) - return ret; + goto out; - /* empty out the rx and queues */ + /* empty out the Rx and Tx queues */ netif_device_detach(dev->net); lan78xx_terminate_urbs(dev); usb_kill_urb(dev->urb_intr); /* reattach */ netif_device_attach(dev->net); - } - if (test_bit(EVENT_DEV_ASLEEP, &dev->flags)) { del_timer(&dev->stat_monitor); if (PMSG_IS_AUTO(message)) { - /* auto suspend (selective suspend) */ - ret = lan78xx_stop_tx_path(dev); + ret = lan78xx_set_auto_suspend(dev); if (ret < 0) - return ret; - - ret = lan78xx_stop_rx_path(dev); - if (ret < 0) - return ret; - - ret = lan78xx_write_reg(dev, WUCSR, 0); - if (ret < 0) - return ret; - ret = lan78xx_write_reg(dev, WUCSR2, 0); - if (ret < 0) - return ret; - ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); - if (ret < 0) - return ret; - - /* set goodframe wakeup */ - ret = lan78xx_read_reg(dev, WUCSR, &buf); - if (ret < 0) - return ret; - - buf |= WUCSR_RFE_WAKE_EN_; - buf |= WUCSR_STORE_WAKE_; - - ret = lan78xx_write_reg(dev, WUCSR, buf); - if (ret < 0) - return ret; - - ret = lan78xx_read_reg(dev, PMT_CTL, &buf); - if (ret < 0) - return ret; - - buf &= ~PMT_CTL_RES_CLR_WKP_EN_; - buf |= PMT_CTL_RES_CLR_WKP_STS_; - - buf |= PMT_CTL_PHY_WAKE_EN_; - buf |= PMT_CTL_WOL_EN_; - buf &= ~PMT_CTL_SUS_MODE_MASK_; - buf |= PMT_CTL_SUS_MODE_3_; - - ret = lan78xx_write_reg(dev, PMT_CTL, buf); - if (ret < 0) - return ret; - - ret = lan78xx_read_reg(dev, PMT_CTL, &buf); - if (ret < 0) - return ret; - - buf |= PMT_CTL_WUPS_MASK_; - - ret = lan78xx_write_reg(dev, PMT_CTL, buf); - if (ret < 0) - return ret; - - ret = lan78xx_start_rx_path(dev); - if (ret < 0) - return ret; + goto out; } else { struct lan78xx_priv *pdata; pdata = (struct lan78xx_priv *)(dev->data[0]); - + netif_carrier_off(dev->net); ret = lan78xx_set_suspend(dev, pdata->wol); if (ret < 0) - return ret; + goto out; } + } else { + /* Interface is down; don't allow WOL and PHY + * events to wake up the host + */ + u32 buf; + + set_bit(EVENT_DEV_ASLEEP, &dev->flags); + + ret = lan78xx_write_reg(dev, WUCSR, 0); + if (ret < 0) + goto out; + ret = lan78xx_write_reg(dev, WUCSR2, 0); + if (ret < 0) + goto out; + + ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + goto out; + + buf &= ~PMT_CTL_RES_CLR_WKP_EN_; + buf |= PMT_CTL_RES_CLR_WKP_STS_; + buf &= ~PMT_CTL_SUS_MODE_MASK_; + buf |= PMT_CTL_SUS_MODE_3_; + + ret = lan78xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) + goto out; + + ret = lan78xx_read_reg(dev, PMT_CTL, &buf); + if (ret < 0) + goto out; + + buf |= PMT_CTL_WUPS_MASK_; + + ret = lan78xx_write_reg(dev, PMT_CTL, buf); + if (ret < 0) + goto out; } ret = 0; out: + mutex_unlock(&dev->dev_mutex); + return ret; } +static bool lan78xx_submit_deferred_urbs(struct lan78xx_net *dev) +{ + bool pipe_halted = false; + struct urb *urb; + + while ((urb = usb_get_from_anchor(&dev->deferred))) { + struct sk_buff *skb = urb->context; + int ret; + + if (!netif_device_present(dev->net) || + !netif_carrier_ok(dev->net) || + pipe_halted) { + usb_free_urb(urb); + dev_kfree_skb(skb); + continue; + } + + ret = usb_submit_urb(urb, GFP_ATOMIC); + + if (ret == 0) { + netif_trans_update(dev->net); + lan78xx_queue_skb(&dev->txq, skb, tx_start); + } else { + usb_free_urb(urb); + dev_kfree_skb(skb); + + if (ret == -EPIPE) { + netif_stop_queue(dev->net); + pipe_halted = true; + } else if (ret == -ENODEV) { + netif_device_detach(dev->net); + } + } + } + + return pipe_halted; +} + static int lan78xx_resume(struct usb_interface *intf) { struct lan78xx_net *dev = usb_get_intfdata(intf); - struct sk_buff *skb; - struct urb *res; + bool dev_open; int ret; - if (!timer_pending(&dev->stat_monitor)) { - dev->delta = 1; - mod_timer(&dev->stat_monitor, - jiffies + STAT_UPDATE_TIMER); - } + mutex_lock(&dev->dev_mutex); - ret = lan78xx_flush_tx_fifo(dev); - if (ret < 0) - return ret; + netif_dbg(dev, ifup, dev->net, "resuming device"); - if (!--dev->suspend_count) { - /* resume interrupt URBs */ - if (dev->urb_intr && test_bit(EVENT_DEV_OPEN, &dev->flags)) { - ret = usb_submit_urb(dev->urb_intr, GFP_NOIO); - if (ret < 0) - return ret; - } + dev_open = test_bit(EVENT_DEV_OPEN, &dev->flags); + + if (dev_open) { + bool pipe_halted = false; + + ret = lan78xx_flush_tx_fifo(dev); + if (ret < 0) + goto out; + + if (dev->urb_intr) { + int ret = usb_submit_urb(dev->urb_intr, GFP_KERNEL); - spin_lock_irq(&dev->txq.lock); - while ((res = usb_get_from_anchor(&dev->deferred))) { - skb = (struct sk_buff *)res->context; - ret = usb_submit_urb(res, GFP_ATOMIC); if (ret < 0) { - dev_kfree_skb_any(skb); - usb_free_urb(res); - usb_autopm_put_interface_async(dev->intf); - } else { - netif_trans_update(dev->net); - lan78xx_queue_skb(&dev->txq, skb, tx_start); + if (ret == -ENODEV) + netif_device_detach(dev->net); + + netdev_warn(dev->net, "Failed to submit intr URB"); } } + spin_lock_irq(&dev->txq.lock); + + if (netif_device_present(dev->net)) { + pipe_halted = lan78xx_submit_deferred_urbs(dev); + + if (pipe_halted) + lan78xx_defer_kevent(dev, EVENT_TX_HALT); + } + clear_bit(EVENT_DEV_ASLEEP, &dev->flags); + spin_unlock_irq(&dev->txq.lock); - if (test_bit(EVENT_DEV_OPEN, &dev->flags)) { - if (!(skb_queue_len(&dev->txq) >= dev->tx_qlen)) - netif_start_queue(dev->net); - tasklet_schedule(&dev->bh); + if (!pipe_halted && + netif_device_present(dev->net) && + (skb_queue_len(&dev->txq) < dev->tx_qlen)) + netif_start_queue(dev->net); + + ret = lan78xx_start_tx_path(dev); + if (ret < 0) + goto out; + + tasklet_schedule(&dev->bh); + + if (!timer_pending(&dev->stat_monitor)) { + dev->delta = 1; + mod_timer(&dev->stat_monitor, + jiffies + STAT_UPDATE_TIMER); } + + } else { + clear_bit(EVENT_DEV_ASLEEP, &dev->flags); } ret = lan78xx_write_reg(dev, WUCSR2, 0); if (ret < 0) - return ret; + goto out; ret = lan78xx_write_reg(dev, WUCSR, 0); if (ret < 0) - return ret; + goto out; ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); if (ret < 0) - return ret; + goto out; ret = lan78xx_write_reg(dev, WUCSR2, WUCSR2_NS_RCD_ | WUCSR2_ARP_RCD_ | WUCSR2_IPV6_TCPSYN_RCD_ | WUCSR2_IPV4_TCPSYN_RCD_); if (ret < 0) - return ret; + goto out; ret = lan78xx_write_reg(dev, WUCSR, WUCSR_EEE_TX_WAKE_ | WUCSR_EEE_RX_WAKE_ | @@ -4434,9 +4579,11 @@ static int lan78xx_resume(struct usb_interface *intf) WUCSR_MPR_ | WUCSR_BCST_FR_); if (ret < 0) - return ret; + goto out; - ret = lan78xx_start_tx_path(dev); + ret = 0; +out: + mutex_unlock(&dev->dev_mutex); return ret; } @@ -4446,6 +4593,8 @@ static int lan78xx_reset_resume(struct usb_interface *intf) struct lan78xx_net *dev = usb_get_intfdata(intf); int ret; + netif_dbg(dev, ifup, dev->net, "(reset) resuming device"); + ret = lan78xx_reset(dev); if (ret < 0) return ret; From 336261af041592fd5ef24022f132de96030ea049 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 28 Feb 2024 13:45:17 +0100 Subject: [PATCH 07/71] net: lan78xx: fix runtime PM count underflow on link stop [ Upstream commit 1eecc7ab82c42133b748e1895275942a054a7f67 ] Current driver has some asymmetry in the runtime PM calls. On lan78xx_open() it will call usb_autopm_get() and unconditionally usb_autopm_put(). And on lan78xx_stop() it will call only usb_autopm_put(). So far, it was working only because this driver do not activate autosuspend by default, so it was visible only by warning "Runtime PM usage count underflow!". Since, with current driver, we can't use runtime PM with active link, execute lan78xx_open()->usb_autopm_put() only in error case. Otherwise, keep ref counting high as long as interface is open. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Oleksij Rempel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index c177a8676199..96d3d0bd248b 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2897,7 +2897,8 @@ static int lan78xx_open(struct net_device *net) done: mutex_unlock(&dev->dev_mutex); - usb_autopm_put_interface(dev->intf); + if (ret < 0) + usb_autopm_put_interface(dev->intf); return ret; } From ad91d5d1b65a5ef7eff6b5175561c8169e8886e9 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:51 +0100 Subject: [PATCH 08/71] ixgbe: {dis, en}able irqs in ixgbe_txrx_ring_{dis, en}able [ Upstream commit cbf996f52c4e658b3fb4349a869a62fd2d4c3c1c ] Currently routines that are supposed to toggle state of ring pair do not take care of associated interrupt with queue vector that these rings belong to. This causes funky issues such as dead interface due to irq misconfiguration, as per Pavel's report from Closes: tag. Add a function responsible for disabling single IRQ in EIMC register and call this as a very first thing when disabling ring pair during xsk_pool setup. For enable let's reuse ixgbe_irq_enable_queues(). Besides this, disable/enable NAPI as first/last thing when dealing with closing or opening ring pair that xsk_pool is being configured on. Reported-by: Pavel Vazharov Closes: https://lore.kernel.org/netdev/CAJEV1ijxNyPTwASJER1bcZzS9nMoZJqfR86nu_3jFFVXzZQ4NA@mail.gmail.com/ Fixes: 024aa5800f32 ("ixgbe: added Rx/Tx ring disable/enable functions") Signed-off-by: Maciej Fijalkowski Acked-by: Magnus Karlsson Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index b16cb2365d96..b7672200dc62 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2949,8 +2949,8 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { - u32 mask; struct ixgbe_hw *hw = &adapter->hw; + u32 mask; switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -10394,6 +10394,44 @@ static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring) memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); } +/** + * ixgbe_irq_disable_single - Disable single IRQ vector + * @adapter: adapter structure + * @ring: ring index + **/ +static void ixgbe_irq_disable_single(struct ixgbe_adapter *adapter, u32 ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u64 qmask = BIT_ULL(ring); + u32 mask; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + mask = qmask & IXGBE_EIMC_RTX_QUEUE; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_x550em_a: + mask = (qmask & 0xFFFFFFFF); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); + mask = (qmask >> 32); + if (mask) + IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); + break; + default: + break; + } + IXGBE_WRITE_FLUSH(&adapter->hw); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + synchronize_irq(adapter->msix_entries[ring].vector); + else + synchronize_irq(adapter->pdev->irq); +} + /** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure @@ -10410,6 +10448,11 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; + ixgbe_irq_disable_single(adapter, ring); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring) ixgbe_disable_txr(adapter, xdp_ring); @@ -10418,9 +10461,6 @@ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring) if (xdp_ring) synchronize_rcu(); - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_disable(&rx_ring->q_vector->napi); - ixgbe_clean_tx_ring(tx_ring); if (xdp_ring) ixgbe_clean_tx_ring(xdp_ring); @@ -10448,9 +10488,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) tx_ring = adapter->tx_ring[ring]; xdp_ring = adapter->xdp_ring[ring]; - /* Rx/Tx/XDP Tx share the same napi context. */ - napi_enable(&rx_ring->q_vector->napi); - ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring) ixgbe_configure_tx_ring(adapter, xdp_ring); @@ -10459,6 +10496,11 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring) clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring) clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state); + + /* Rx/Tx/XDP Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); + ixgbe_irq_enable_queues(adapter, BIT_ULL(ring)); + IXGBE_WRITE_FLUSH(&adapter->hw); } /** From 71e21eb1f888cb0d0f3de3f579c0db413e6e3810 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Feb 2024 22:45:52 +0100 Subject: [PATCH 09/71] i40e: disable NAPI right after disabling irqs when handling xsk_pool [ Upstream commit d562b11c1eac7d73f4c778b4cbe5468f86b1f20d ] Disable NAPI before shutting down queues that this particular NAPI contains so that the order of actions in i40e_queue_pair_disable() mirrors what we do in i40e_queue_pair_enable(). Fixes: 123cecd427b6 ("i40e: added queue pair disable/enable functions") Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Magnus Karlsson Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 135acd74497f..58c87d79c126 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12944,9 +12944,9 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair) return err; i40e_queue_pair_disable_irq(vsi, queue_pair); + i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */); i40e_clean_rx_ring(vsi->rx_rings[queue_pair]); - i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */); i40e_queue_pair_clean_rings(vsi, queue_pair); i40e_queue_pair_reset_stats(vsi, queue_pair); From fdb63c179f7a26e0fbc3fd8f57f655f7f45fe984 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 29 Feb 2024 14:34:44 -0500 Subject: [PATCH 10/71] tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 51270d573a8d9dd5afdc7934de97d66c0e14b5fd ] I'm updating __assign_str() and will be removing the second parameter. To make sure that it does not break anything, I make sure that it matches the __string() field, as that is where the string is actually going to be saved in. To make sure there's nothing that breaks, I added a WARN_ON() to make sure that what was used in __string() is the same that is used in __assign_str(). In doing this change, an error was triggered as __assign_str() now expects the string passed in to be a char * value. I instead had the following warning: include/trace/events/qdisc.h: In function ‘trace_event_raw_event_qdisc_reset’: include/trace/events/qdisc.h:91:35: error: passing argument 1 of 'strcmp' from incompatible pointer type [-Werror=incompatible-pointer-types] 91 | __assign_str(dev, qdisc_dev(q)); That's because the qdisc_enqueue() and qdisc_reset() pass in qdisc_dev(q) to __assign_str() and to __string(). But that function returns a pointer to struct net_device and not a string. It appears that these events are just saving the pointer as a string and then reading it as a string as well. Use qdisc_dev(q)->name to save the device instead. Fixes: a34dac0b90552 ("net_sched: add tracepoints for qdisc_reset() and qdisc_destroy()") Signed-off-by: Steven Rostedt (Google) Reviewed-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/trace/events/qdisc.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index 330d32d84485..a50df41634c5 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -53,14 +53,14 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; @@ -78,14 +78,14 @@ TRACE_EVENT(qdisc_destroy, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q) ) - __string( kind, q->ops->id ) - __field( u32, parent ) - __field( u32, handle ) + __string( dev, qdisc_dev(q)->name ) + __string( kind, q->ops->id ) + __field( u32, parent ) + __field( u32, handle ) ), TP_fast_assign( - __assign_str(dev, qdisc_dev(q)); + __assign_str(dev, qdisc_dev(q)->name); __assign_str(kind, q->ops->id); __entry->parent = q->parent; __entry->handle = q->handle; From c7137900691f5692fe3de54566ea7b30bb35d66c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Feb 2024 13:11:52 +0000 Subject: [PATCH 11/71] geneve: make sure to pull inner header in geneve_rx() [ Upstream commit 1ca1ba465e55b9460e4e75dec9fff31e708fec74 ] syzbot triggered a bug in geneve_rx() [1] Issue is similar to the one I fixed in commit 8d975c15c0cd ("ip6_tunnel: make sure to pull inner header in __ip6_tnl_rcv()") We have to save skb->network_header in a temporary variable in order to be able to recompute the network_header pointer after a pskb_inet_may_pull() call. pskb_inet_may_pull() makes sure the needed headers are in skb->head. [1] BUG: KMSAN: uninit-value in IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] BUG: KMSAN: uninit-value in geneve_rx drivers/net/geneve.c:279 [inline] BUG: KMSAN: uninit-value in geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 IP_ECN_decapsulate include/net/inet_ecn.h:302 [inline] geneve_rx drivers/net/geneve.c:279 [inline] geneve_udp_encap_recv+0x36f9/0x3c10 drivers/net/geneve.c:391 udp_queue_rcv_one_skb+0x1d39/0x1f20 net/ipv4/udp.c:2108 udp_queue_rcv_skb+0x6ae/0x6e0 net/ipv4/udp.c:2186 udp_unicast_rcv_skb+0x184/0x4b0 net/ipv4/udp.c:2346 __udp4_lib_rcv+0x1c6b/0x3010 net/ipv4/udp.c:2422 udp_rcv+0x7d/0xa0 net/ipv4/udp.c:2604 ip_protocol_deliver_rcu+0x264/0x1300 net/ipv4/ip_input.c:205 ip_local_deliver_finish+0x2b8/0x440 net/ipv4/ip_input.c:233 NF_HOOK include/linux/netfilter.h:314 [inline] ip_local_deliver+0x21f/0x490 net/ipv4/ip_input.c:254 dst_input include/net/dst.h:461 [inline] ip_rcv_finish net/ipv4/ip_input.c:449 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] ip_rcv+0x46f/0x760 net/ipv4/ip_input.c:569 __netif_receive_skb_one_core net/core/dev.c:5534 [inline] __netif_receive_skb+0x1a6/0x5a0 net/core/dev.c:5648 process_backlog+0x480/0x8b0 net/core/dev.c:5976 __napi_poll+0xe3/0x980 net/core/dev.c:6576 napi_poll net/core/dev.c:6645 [inline] net_rx_action+0x8b8/0x1870 net/core/dev.c:6778 __do_softirq+0x1b7/0x7c5 kernel/softirq.c:553 do_softirq+0x9a/0xf0 kernel/softirq.c:454 __local_bh_enable_ip+0x9b/0xa0 kernel/softirq.c:381 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:820 [inline] __dev_queue_xmit+0x2768/0x51c0 net/core/dev.c:4378 dev_queue_xmit include/linux/netdevice.h:3171 [inline] packet_xmit+0x9c/0x6b0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3081 [inline] packet_sendmsg+0x8aef/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: slab_post_alloc_hook mm/slub.c:3819 [inline] slab_alloc_node mm/slub.c:3860 [inline] kmem_cache_alloc_node+0x5cb/0xbc0 mm/slub.c:3903 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:560 __alloc_skb+0x352/0x790 net/core/skbuff.c:651 alloc_skb include/linux/skbuff.h:1296 [inline] alloc_skb_with_frags+0xc8/0xbd0 net/core/skbuff.c:6394 sock_alloc_send_pskb+0xa80/0xbf0 net/core/sock.c:2783 packet_alloc_skb net/packet/af_packet.c:2930 [inline] packet_snd net/packet/af_packet.c:3024 [inline] packet_sendmsg+0x70c2/0x9f10 net/packet/af_packet.c:3113 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] __sys_sendto+0x735/0xa10 net/socket.c:2191 __do_sys_sendto net/socket.c:2203 [inline] __se_sys_sendto net/socket.c:2199 [inline] __x64_sys_sendto+0x125/0x1c0 net/socket.c:2199 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels") Reported-and-tested-by: syzbot+6a1423ff3f97159aae64@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/geneve.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 081939cb420b..2bb9820c6664 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -218,7 +218,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; - int err = 0; + int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { @@ -262,9 +262,23 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, goto drop; } - oiph = skb_network_header(skb); + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(geneve->dev, rx_length_errors); + DEV_STATS_INC(geneve->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + oiph = skb->head + nh; + if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) From 37fe99016b12d32100ce670216816dba6c48b309 Mon Sep 17 00:00:00 2001 From: Rand Deeb Date: Wed, 28 Feb 2024 18:54:48 +0300 Subject: [PATCH 12/71] net: ice: Fix potential NULL pointer dereference in ice_bridge_setlink() [ Upstream commit 06e456a05d669ca30b224b8ed962421770c1496c ] The function ice_bridge_setlink() may encounter a NULL pointer dereference if nlmsg_find_attr() returns NULL and br_spec is dereferenced subsequently in nla_for_each_nested(). To address this issue, add a check to ensure that br_spec is not NULL before proceeding with the nested attribute iteration. Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink") Signed-off-by: Rand Deeb Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 02f72fbec104..035bc90c8124 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6546,6 +6546,8 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, pf_sw = pf->first_sw; /* find the attribute in the netlink message */ br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; From 79ce2e54cc0ae366f45516c00bf1b19aa43e9abe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Mar 2024 14:48:00 +0000 Subject: [PATCH 13/71] net/ipv6: avoid possible UAF in ip6_route_mpath_notify() [ Upstream commit 685f7d531264599b3f167f1e94bbd22f120e5fab ] syzbot found another use-after-free in ip6_route_mpath_notify() [1] Commit f7225172f25a ("net/ipv6: prevent use after free in ip6_route_mpath_notify") was not able to fix the root cause. We need to defer the fib6_info_release() calls after ip6_route_mpath_notify(), in the cleanup phase. [1] BUG: KASAN: slab-use-after-free in rt6_fill_node+0x1460/0x1ac0 Read of size 4 at addr ffff88809a07fc64 by task syz-executor.2/23037 CPU: 0 PID: 23037 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-01035-gea7f3cfaa588 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:377 [inline] print_report+0x167/0x540 mm/kasan/report.c:488 kasan_report+0x142/0x180 mm/kasan/report.c:601 rt6_fill_node+0x1460/0x1ac0 inet6_rt_notify+0x13b/0x290 net/ipv6/route.c:6184 ip6_route_mpath_notify net/ipv6/route.c:5198 [inline] ip6_route_multipath_add net/ipv6/route.c:5404 [inline] inet6_rtm_newroute+0x1d0f/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7f73dd87dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f73de6550c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f73dd9ac050 RCX: 00007f73dd87dda9 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005 RBP: 00007f73dd8ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000006e R14: 00007f73dd9ac050 R15: 00007ffdbdeb7858 Allocated by task 23037: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:372 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:389 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slub.c:3981 [inline] __kmalloc+0x22e/0x490 mm/slub.c:3994 kmalloc include/linux/slab.h:594 [inline] kzalloc include/linux/slab.h:711 [inline] fib6_info_alloc+0x2e/0xf0 net/ipv6/ip6_fib.c:155 ip6_route_info_create+0x445/0x12b0 net/ipv6/route.c:3758 ip6_route_multipath_add net/ipv6/route.c:5298 [inline] inet6_rtm_newroute+0x744/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 Freed by task 16: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x4e/0x60 mm/kasan/generic.c:640 poison_slab_object+0xa6/0xe0 mm/kasan/common.c:241 __kasan_slab_free+0x34/0x70 mm/kasan/common.c:257 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2121 [inline] slab_free mm/slub.c:4299 [inline] kfree+0x14a/0x380 mm/slub.c:4409 rcu_do_batch kernel/rcu/tree.c:2190 [inline] rcu_core+0xd76/0x1810 kernel/rcu/tree.c:2465 __do_softirq+0x2bb/0x942 kernel/softirq.c:553 Last potentially related work creation: kasan_save_stack+0x3f/0x60 mm/kasan/common.c:47 __kasan_record_aux_stack+0xae/0x100 mm/kasan/generic.c:586 __call_rcu_common kernel/rcu/tree.c:2715 [inline] call_rcu+0x167/0xa80 kernel/rcu/tree.c:2829 fib6_info_release include/net/ip6_fib.h:341 [inline] ip6_route_multipath_add net/ipv6/route.c:5344 [inline] inet6_rtm_newroute+0x114d/0x2300 net/ipv6/route.c:5517 rtnetlink_rcv_msg+0x885/0x1040 net/core/rtnetlink.c:6597 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 The buggy address belongs to the object at ffff88809a07fc00 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 100 bytes inside of freed 512-byte region [ffff88809a07fc00, ffff88809a07fe00) The buggy address belongs to the physical page: page:ffffea0002681f00 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x9a07c head:ffffea0002681f00 order:2 entire_mapcount:0 nr_pages_mapped:0 pincount:0 flags: 0xfff00000000840(slab|head|node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000840 ffff888014c41c80 dead000000000122 0000000000000000 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 2, migratetype Unmovable, gfp_mask 0x1d20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL), pid 23028, tgid 23027 (syz-executor.4), ts 2340253595219, free_ts 2339107097036 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x1ea/0x210 mm/page_alloc.c:1533 prep_new_page mm/page_alloc.c:1540 [inline] get_page_from_freelist+0x33ea/0x3580 mm/page_alloc.c:3311 __alloc_pages+0x255/0x680 mm/page_alloc.c:4567 __alloc_pages_node include/linux/gfp.h:238 [inline] alloc_pages_node include/linux/gfp.h:261 [inline] alloc_slab_page+0x5f/0x160 mm/slub.c:2190 allocate_slab mm/slub.c:2354 [inline] new_slab+0x84/0x2f0 mm/slub.c:2407 ___slab_alloc+0xd17/0x13e0 mm/slub.c:3540 __slab_alloc mm/slub.c:3625 [inline] __slab_alloc_node mm/slub.c:3678 [inline] slab_alloc_node mm/slub.c:3850 [inline] __do_kmalloc_node mm/slub.c:3980 [inline] __kmalloc+0x2e0/0x490 mm/slub.c:3994 kmalloc include/linux/slab.h:594 [inline] kzalloc include/linux/slab.h:711 [inline] new_dir fs/proc/proc_sysctl.c:956 [inline] get_subdir fs/proc/proc_sysctl.c:1000 [inline] sysctl_mkdir_p fs/proc/proc_sysctl.c:1295 [inline] __register_sysctl_table+0xb30/0x1440 fs/proc/proc_sysctl.c:1376 neigh_sysctl_register+0x416/0x500 net/core/neighbour.c:3859 devinet_sysctl_register+0xaf/0x1f0 net/ipv4/devinet.c:2644 inetdev_init+0x296/0x4d0 net/ipv4/devinet.c:286 inetdev_event+0x338/0x15c0 net/ipv4/devinet.c:1555 notifier_call_chain+0x18f/0x3b0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:1987 [inline] call_netdevice_notifiers net/core/dev.c:2001 [inline] register_netdevice+0x15b2/0x1a20 net/core/dev.c:10340 br_dev_newlink+0x27/0x100 net/bridge/br_netlink.c:1563 rtnl_newlink_create net/core/rtnetlink.c:3497 [inline] __rtnl_newlink net/core/rtnetlink.c:3717 [inline] rtnl_newlink+0x158f/0x20a0 net/core/rtnetlink.c:3730 page last free pid 11583 tgid 11583 stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1140 [inline] free_unref_page_prepare+0x968/0xa90 mm/page_alloc.c:2346 free_unref_page+0x37/0x3f0 mm/page_alloc.c:2486 kasan_depopulate_vmalloc_pte+0x74/0x90 mm/kasan/shadow.c:415 apply_to_pte_range mm/memory.c:2619 [inline] apply_to_pmd_range mm/memory.c:2663 [inline] apply_to_pud_range mm/memory.c:2699 [inline] apply_to_p4d_range mm/memory.c:2735 [inline] __apply_to_page_range+0x8ec/0xe40 mm/memory.c:2769 kasan_release_vmalloc+0x9a/0xb0 mm/kasan/shadow.c:532 __purge_vmap_area_lazy+0x163f/0x1a10 mm/vmalloc.c:1770 drain_vmap_area_work+0x40/0xd0 mm/vmalloc.c:1804 process_one_work kernel/workqueue.c:2633 [inline] process_scheduled_works+0x913/0x1420 kernel/workqueue.c:2706 worker_thread+0xa5f/0x1000 kernel/workqueue.c:2787 kthread+0x2ef/0x390 kernel/kthread.c:388 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 Memory state around the buggy address: ffff88809a07fb00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88809a07fb80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88809a07fc00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88809a07fc80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88809a07fd00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 3b1137fe7482 ("net: ipv6: Change notifications for multipath add to RTA_MULTIPATH") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240303144801.702646-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv6/route.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b23e42efb3df..2d53c362f309 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5235,19 +5235,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); - fib6_info_release(nh->fib6_info); - if (!err) { - /* save reference to last route successfully inserted */ - rt_last = nh->fib6_info; - - /* save reference to first route for notification */ - if (!rt_notif) - rt_notif = nh->fib6_info; - } - - /* nh->fib6_info is used or freed at this point, reset to NULL*/ - nh->fib6_info = NULL; if (err) { if (replace && nhn) NL_SET_ERR_MSG_MOD(extack, @@ -5255,6 +5243,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = nh; goto add_errout; } + /* save reference to last route successfully inserted */ + rt_last = nh->fib6_info; + + /* save reference to first route for notification */ + if (!rt_notif) + rt_notif = nh->fib6_info; /* Because each route is added like a single route we remove * these flags after the first nexthop: if there is a collision, @@ -5315,8 +5309,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->fib6_info) - fib6_info_release(nh->fib6_info); + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } From 5f4e51abfbe6eb444fa91906a5cd083044278297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 5 Mar 2024 22:31:32 +0100 Subject: [PATCH 14/71] cpumap: Zero-initialise xdp_rxq_info struct before running XDP program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2487007aa3b9fafbd2cb14068f49791ce1d7ede5 ] When running an XDP program that is attached to a cpumap entry, we don't initialise the xdp_rxq_info data structure being used in the xdp_buff that backs the XDP program invocation. Tobias noticed that this leads to random values being returned as the xdp_md->rx_queue_index value for XDP programs running in a cpumap. This means we're basically returning the contents of the uninitialised memory, which is bad. Fix this by zero-initialising the rxq data structure before running the XDP program. Fixes: 9216477449f3 ("bpf: cpumap: Add the possibility to attach an eBPF program to cpumap") Reported-by: Tobias Böhm Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240305213132.11955-1-toke@redhat.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- kernel/bpf/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index c61a23b564aa..2dcc04b2f330 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -229,7 +229,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, void **frames, int n, struct xdp_cpumap_stats *stats) { - struct xdp_rxq_info rxq; + struct xdp_rxq_info rxq = {}; struct xdp_buff xdp; int i, nframes = 0; From 9dfc15a10dfd44f8ff7f27488651cb5be6af83c2 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Tue, 5 Mar 2024 08:13:08 +0800 Subject: [PATCH 15/71] net/rds: fix WARNING in rds_conn_connect_if_down [ Upstream commit c055fc00c07be1f0df7375ab0036cebd1106ed38 ] If connection isn't established yet, get_mr() will fail, trigger connection after get_mr(). Fixes: 584a8279a44a ("RDS: RDMA: return appropriate error on rdma map failures") Reported-and-tested-by: syzbot+d4faee732755bba9838e@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/rds/rdma.c | 3 +++ net/rds/send.c | 6 +----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 6f1a50d50d06..c29c7a59f205 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, kfree(sg); } ret = PTR_ERR(trans_private); + /* Trigger connection so that its ready for the next retry */ + if (ret == -ENODEV) + rds_conn_connect_if_down(cp->cp_conn); goto out; } diff --git a/net/rds/send.c b/net/rds/send.c index 985d0b7713ac..65eeb82cb5de 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1314,12 +1314,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct); - if (ret) { - /* Trigger connection so that its ready for the next retry */ - if (ret == -EAGAIN) - rds_conn_connect_if_down(conn); + if (ret) goto out; - } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", From 2b4e7cb7d523448fc24fdb863ff75c8008979637 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Mar 2024 13:38:15 +0100 Subject: [PATCH 16/71] netfilter: nft_ct: fix l3num expectations with inet pseudo family [ Upstream commit 99993789966a6eb4f1295193dc543686899892d3 ] Following is rejected but should be allowed: table inet t { ct expectation exp1 { [..] l3proto ip Valid combos are: table ip t, l3proto ip table ip6 t, l3proto ip6 table inet t, l3proto ip OR l3proto ip6 Disallow inet pseudeo family, the l3num must be a on-wire protocol known to conntrack. Retain NFPROTO_INET case to make it clear its rejected intentionally rather as oversight. Fixes: 8059918a1377 ("netfilter: nft_ct: sanitize layer 3 and 4 protocol number in custom expectations") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nft_ct.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 2b15dbbca98b..2a8dfa68f6e2 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1188,14 +1188,13 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, switch (priv->l3num) { case NFPROTO_IPV4: case NFPROTO_IPV6: - if (priv->l3num != ctx->family) - return -EINVAL; + if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET) + break; - fallthrough; - case NFPROTO_INET: - break; + return -EINVAL; + case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */ default: - return -EOPNOTSUPP; + return -EAFNOSUPPORT; } priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); From ccd1108b16ab572d9bf635586b0925635dbd6bbc Mon Sep 17 00:00:00 2001 From: Lena Wang Date: Tue, 5 Mar 2024 11:38:55 +0000 Subject: [PATCH 17/71] netfilter: nf_conntrack_h323: Add protection for bmp length out of range [ Upstream commit 767146637efc528b5e3d31297df115e85a2fd362 ] UBSAN load reports an exception of BRK#5515 SHIFT_ISSUE:Bitwise shifts that are out of bounds for their data type. vmlinux get_bitmap(b=75) + 712 vmlinux decode_seq(bs=0xFFFFFFD008037000, f=0xFFFFFFD008037018, level=134443100) + 1956 vmlinux decode_choice(base=0xFFFFFFD0080370F0, level=23843636) + 1216 vmlinux decode_seq(f=0xFFFFFFD0080371A8, level=134443500) + 812 vmlinux decode_choice(base=0xFFFFFFD008037280, level=0) + 1216 vmlinux DecodeRasMessage() + 304 vmlinux ras_help() + 684 vmlinux nf_confirm() + 188 Due to abnormal data in skb->data, the extension bitmap length exceeds 32 when decoding ras message then uses the length to make a shift operation. It will change into negative after several loop. UBSAN load could detect a negative shift as an undefined behaviour and reports exception. So we add the protection to avoid the length exceeding 32. Or else it will return out of range error and stop decoding. Fixes: 5e35941d9901 ("[NETFILTER]: Add H.323 conntrack/NAT helper") Signed-off-by: Lena Wang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_h323_asn1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index e697a824b001..540d97715bd2 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Get fields bitmap */ if (nf_h323_error_boundary(bs, 0, f->sz)) return H323_ERROR_BOUND; + if (f->sz > 32) + return H323_ERROR_RANGE; bmp = get_bitmap(bs, f->sz); if (base) *(unsigned int *)base = bmp; @@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, bmp2_len = get_bits(bs, 7) + 1; if (nf_h323_error_boundary(bs, 0, bmp2_len)) return H323_ERROR_BOUND; + if (bmp2_len > 32) + return H323_ERROR_RANGE; bmp2 = get_bitmap(bs, bmp2_len); bmp |= bmp2 >> f->sz; if (base) From e041df5dc9e68adffcba5499ca28e1252bed6f4b Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:35 +0800 Subject: [PATCH 18/71] netrom: Fix a data-race around sysctl_netrom_default_path_quality [ Upstream commit 958d6145a6d9ba9e075c921aead8753fb91c9101 ] We need to protect the reader reading sysctl_netrom_default_path_quality because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 78da5eab252a..94c4d4554b9c 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; - nr_neigh->quality = sysctl_netrom_default_path_quality; + nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); nr_neigh->locked = 0; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; From 18c95d11c347a12e5c31df1325cef6b995d14ecf Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:36 +0800 Subject: [PATCH 19/71] netrom: Fix a data-race around sysctl_netrom_obsolescence_count_initialiser [ Upstream commit cfd9f4a740f772298308b2e6070d2c744fb5cf79 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 94c4d4554b9c..8a5375e1dcc7 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -763,7 +763,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) if (ax25 != NULL) { ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, ax25->ax25_dev->dev, 0, - sysctl_netrom_obsolescence_count_initialiser); + READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); if (ret) return ret; } From d1261bde59a3a087ab0c81181821e194278d9264 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:37 +0800 Subject: [PATCH 20/71] netrom: Fix data-races around sysctl_netrom_network_ttl_initialiser [ Upstream commit 119cae5ea3f9e35cdada8e572cc067f072fa825a ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_dev.c | 2 +- net/netrom/nr_out.c | 2 +- net/netrom/nr_subr.c | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 29e418c8c6c3..4caee8754b79 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev, buff[6] |= AX25_SSSID_SPARE; buff += AX25_ADDR_LEN; - *buff++ = sysctl_netrom_network_ttl_initialiser; + *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); *buff++ = NR_PROTO_IP; *buff++ = NR_PROTO_IP; diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 44929657f5b7..5e531394a724 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index e2d2af924cff..c3bbd5880850 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype) *dptr++ = nr->my_id; *dptr++ = frametype; *dptr++ = nr->window; - if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser; + if (nr->bpqext) + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); break; case NR_DISCREQ: @@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags) dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; - *dptr++ = sysctl_netrom_network_ttl_initialiser; + *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (mine) { *dptr++ = 0; From 291d36d772f5ea5c68a263ee440f2c9eade371c9 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:38 +0800 Subject: [PATCH 21/71] netrom: Fix a data-race around sysctl_netrom_transport_timeout [ Upstream commit 60a7a152abd494ed4f69098cf0f322e6bb140612 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 24747163122b..685751096744 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -453,7 +453,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr_init_timers(sk); nr->t1 = - msecs_to_jiffies(sysctl_netrom_transport_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); nr->n2 = From 34a164d2448264b62af82bc0af3d2c83d12d38ac Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:39 +0800 Subject: [PATCH 22/71] netrom: Fix a data-race around sysctl_netrom_transport_maximum_tries [ Upstream commit e799299aafed417cc1f32adccb2a0e5268b3f6d5 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 685751096744..678a7bbd84bf 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -457,7 +457,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t2 = msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); nr->n2 = - msecs_to_jiffies(sysctl_netrom_transport_maximum_tries); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = msecs_to_jiffies(sysctl_netrom_transport_busy_delay); nr->idle = From 34c84e0036a60e7e50ae50b42ed194d8daef8cc9 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:40 +0800 Subject: [PATCH 23/71] netrom: Fix a data-race around sysctl_netrom_transport_acknowledge_delay [ Upstream commit 806f462ba9029d41aadf8ec93f2f99c5305deada ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 678a7bbd84bf..5a5cca18ae0c 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -455,7 +455,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t1 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout)); nr->t2 = - msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay)); nr->n2 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = From f3315a6edaec12b461031eab8c98c78111a41f95 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:41 +0800 Subject: [PATCH 24/71] netrom: Fix a data-race around sysctl_netrom_transport_busy_delay [ Upstream commit 43547d8699439a67b78d6bb39015113f7aa360fd ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5a5cca18ae0c..76d66eb0de25 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -459,7 +459,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->n2 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries)); nr->t4 = - msecs_to_jiffies(sysctl_netrom_transport_busy_delay); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); nr->window = sysctl_netrom_transport_requested_window_size; From 652b0b35819610a42b8a90d21acb12f69943b397 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:42 +0800 Subject: [PATCH 25/71] netrom: Fix a data-race around sysctl_netrom_transport_requested_window_size [ Upstream commit a2e706841488f474c06e9b33f71afc947fb3bf56 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 76d66eb0de25..d780adf54e19 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -462,7 +462,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); - nr->window = sysctl_netrom_transport_requested_window_size; + nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; nr->state = NR_STATE_0; From 01d4e3afe257768cd2a45f15a0e57bacf932b140 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:43 +0800 Subject: [PATCH 26/71] netrom: Fix a data-race around sysctl_netrom_transport_no_activity_timeout [ Upstream commit f99b494b40431f0ca416859f2345746199398e2b ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index d780adf54e19..376b6af43144 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -461,7 +461,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol, nr->t4 = msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay)); nr->idle = - msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout); + msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout)); nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size); nr->bpqext = 1; From b7d33e083f9d5d39445c0a91e7ad4f3e2c47fcb5 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:44 +0800 Subject: [PATCH 27/71] netrom: Fix a data-race around sysctl_netrom_routing_control [ Upstream commit b5dffcb8f71bdd02a4e5799985b51b12f4eeaf76 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 8a5375e1dcc7..88c59bd0f739 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -777,7 +777,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) return ret; } - if (!sysctl_netrom_routing_control && ax25 != NULL) + if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) return 0; /* Its Time-To-Live has expired */ From cfe0f73fb38a01bce86fe15ef5f750f850f7d3fe Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:45 +0800 Subject: [PATCH 28/71] netrom: Fix a data-race around sysctl_netrom_link_fails_count [ Upstream commit bc76645ebdd01be9b9994dac39685a3d0f6f7985 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/nr_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 88c59bd0f739..895702337c92 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -725,7 +725,7 @@ void nr_link_failed(ax25_cb *ax25, int reason) nr_neigh->ax25 = NULL; ax25_cb_put(ax25); - if (++nr_neigh->failed < sysctl_netrom_link_fails_count) { + if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { nr_neigh_put(nr_neigh); return; } From bbf950a6e96a91cf8cf0c71117b94ed3fafc9dd3 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Mon, 4 Mar 2024 16:20:46 +0800 Subject: [PATCH 29/71] netrom: Fix data-races around sysctl_net_busy_read [ Upstream commit d380ce70058a4ccddc3e5f5c2063165dc07672c6 ] We need to protect the reader reading the sysctl value because the value can be changed concurrently. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jason Xing Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/netrom/af_netrom.c | 2 +- net/netrom/nr_in.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 376b6af43144..37d0bf6cab45 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) * G8PZT's Xrouter which is sending packets with command type 7 * as an extension of the protocol. */ - if (sysctl_netrom_reset_circuit && + if (READ_ONCE(sysctl_netrom_reset_circuit) && (frametype != NR_RESET || flags != 0)) nr_transmit_reset(skb, 1); diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 69e58906c32b..034f79d11ae1 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; @@ -263,7 +263,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype break; case NR_RESET: - if (sysctl_netrom_reset_circuit) + if (READ_ONCE(sysctl_netrom_reset_circuit)) nr_disconnect(sk, ECONNRESET); break; From 1dee72c02170a446d72df8ddec8c3b6936f9f3e8 Mon Sep 17 00:00:00 2001 From: Muhammad Usama Anjum Date: Tue, 16 Jan 2024 14:04:54 +0500 Subject: [PATCH 30/71] selftests/mm: switch to bash from sh [ Upstream commit bc29036e1da1cf66e5f8312649aeec2d51ea3d86 ] Running charge_reserved_hugetlb.sh generates errors if sh is set to dash: ./charge_reserved_hugetlb.sh: 9: [[: not found ./charge_reserved_hugetlb.sh: 19: [[: not found ./charge_reserved_hugetlb.sh: 27: [[: not found ./charge_reserved_hugetlb.sh: 37: [[: not found ./charge_reserved_hugetlb.sh: 45: Syntax error: "(" unexpected Switch to using /bin/bash instead of /bin/sh. Make the switch for write_hugetlb_memory.sh as well which is called from charge_reserved_hugetlb.sh. Link: https://lkml.kernel.org/r/20240116090455.3407378-1-usama.anjum@collabora.com Signed-off-by: Muhammad Usama Anjum Cc: Muhammad Usama Anjum Cc: Shuah Khan Cc: David Laight Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 2 +- tools/testing/selftests/vm/write_hugetlb_memory.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh index 7536ff2f890a..d0107f8ae621 100644 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh index 70a02301f4c2..3d2d2eb9d6ff 100644 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e From c9c3cc6a13bddc76bb533ad8147a5528cac5ba5a Mon Sep 17 00:00:00 2001 From: Nico Pache Date: Fri, 19 Jan 2024 06:14:29 -0700 Subject: [PATCH 31/71] selftests: mm: fix map_hugetlb failure on 64K page size systems [ Upstream commit 91b80cc5b39f00399e8e2d17527cad2c7fa535e2 ] On systems with 64k page size and 512M huge page sizes, the allocation and test succeeds but errors out at the munmap. As the comment states, munmap will failure if its not HUGEPAGE aligned. This is due to the length of the mapping being 1/2 the size of the hugepage causing the munmap to not be hugepage aligned. Fix this by making the mapping length the full hugepage if the hugepage is larger than the length of the mapping. Link: https://lkml.kernel.org/r/20240119131429.172448-1-npache@redhat.com Signed-off-by: Nico Pache Cc: Donet Tom Cc: Shuah Khan Cc: Christophe Leroy Cc: Michael Ellerman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- tools/testing/selftests/vm/map_hugetlb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 312889edb84a..c65c55b7a789 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -15,6 +15,7 @@ #include #include #include +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -70,10 +71,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { From 9c398afd493086b29c62fb0f45e53a2e07cfc199 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Mar 2021 14:02:37 +0100 Subject: [PATCH 32/71] um: allow not setting extra rpaths in the linux binary [ Upstream commit 386093c68ba3e8bcfe7f46deba901e0e80713c29 ] There doesn't seem to be any reason for the rpath being set in the binaries, at on systems that I tested on. On the other hand, setting rpath is actually harming binaries in some cases, e.g. if using nix-based compilation environments where /lib & /lib64 are not part of the actual environment. Add a new Kconfig option (under EXPERT, for less user confusion) that allows disabling the rpath additions. Signed-off-by: Johannes Berg Signed-off-by: Richard Weinberger Stable-dep-of: 846cfbeed09b ("um: Fix adding '-no-pie' for clang") Signed-off-by: Sasha Levin --- arch/um/Kconfig | 13 +++++++++++++ arch/um/Makefile | 3 ++- arch/x86/Makefile.um | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index eb1c6880bde4..20264b47dcff 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -92,6 +92,19 @@ config LD_SCRIPT_DYN depends on !LD_SCRIPT_STATIC select MODULE_REL_CRCS if MODVERSIONS +config LD_SCRIPT_DYN_RPATH + bool "set rpath in the binary" if EXPERT + default y + depends on LD_SCRIPT_DYN + help + Add /lib (and /lib64 for 64-bit) to the linux binary's rpath + explicitly. + + You may need to turn this off if compiling for nix systems + that have their libraries in random /nix directories and + might otherwise unexpected use libraries from /lib or /lib64 + instead of the desired ones. + config HOSTFS tristate "Host filesystem" help diff --git a/arch/um/Makefile b/arch/um/Makefile index 56e5320da762..4211e23a2f68 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -118,7 +118,8 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib $(call cc-option, -no-pie) +LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ -fno-stack-protector $(call cc-option, -fno-stack-protector-all) diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 1db7913795f5..b3c1ae084180 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -44,7 +44,7 @@ ELF_FORMAT := elf64-x86-64 # Not on all 64-bit distros /lib is a symlink to /lib64. PLD is an example. -LINK-$(CONFIG_LD_SCRIPT_DYN) += -Wl,-rpath,/lib64 +LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib64 LINK-y += -m64 endif From 89ed7ebae4f04d05678108a2141b7ddaea7f9355 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 29 Jan 2021 15:00:28 +0200 Subject: [PATCH 33/71] xhci: remove extra loop in interrupt context [ Upstream commit 55f6153d8cc8eff0852d108f80087fdf41dc2169 ] When finishing a TD we walk the endpoint dequeue trb pointer until it matches the last TRB of the TD. TDs can contain over 100 TRBs, meaning we call a function 100 times, do a few comparisons and increase a couple values for each of these calls, all in interrupt context. This can all be avoided by adding a pointer to the last TRB segment, and a number of TRBs in the TD. So instead of walking through each TRB just set the new dequeue segment, pointer, and number of free TRBs directly. Getting rid of the while loop also reduces the risk of getting stuck in a infinite loop in the interrupt handler. Loop relied on valid matching dequeue and last_trb values to break. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210129130044.206855-12-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 21 ++++++++++++++------- drivers/usb/host/xhci.h | 2 ++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index eb70f07e3623..b814dc07116d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2099,8 +2099,9 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td, EP_HARD_RESET); } else { /* Update ring dequeue pointer */ - while (ep_ring->dequeue != td->last_trb) - inc_deq(xhci, ep_ring); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + ep_ring->num_trbs_free += td->num_trbs - 1; inc_deq(xhci, ep_ring); } @@ -2321,8 +2322,9 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, frame->actual_length = 0; /* Update ring dequeue pointer */ - while (ep->ring->dequeue != td->last_trb) - inc_deq(xhci, ep->ring); + ep->ring->dequeue = td->last_trb; + ep->ring->deq_seg = td->last_trb_seg; + ep->ring->num_trbs_free += td->num_trbs - 1; inc_deq(xhci, ep->ring); return xhci_td_cleanup(xhci, td, ep->ring, status); @@ -3487,7 +3489,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, field |= TRB_IOC; more_trbs_coming = false; td->last_trb = ring->enqueue; - + td->last_trb_seg = ring->enq_seg; if (xhci_urb_suitable_for_idt(urb)) { memcpy(&send_addr, urb->transfer_buffer, trb_buff_len); @@ -3513,7 +3515,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, upper_32_bits(send_addr), length_field, field); - + td->num_trbs++; addr += trb_buff_len; sent_len = trb_buff_len; @@ -3537,8 +3539,10 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, ep_index, urb->stream_id, 1, urb, 1, mem_flags); urb_priv->td[1].last_trb = ring->enqueue; + urb_priv->td[1].last_trb_seg = ring->enq_seg; field = TRB_TYPE(TRB_NORMAL) | ring->cycle_state | TRB_IOC; queue_trb(xhci, ring, 0, 0, 0, TRB_INTR_TARGET(0), field); + urb_priv->td[1].num_trbs++; } check_trb_math(urb, enqd_len); @@ -3589,6 +3593,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, urb_priv = urb->hcpriv; td = &urb_priv->td[0]; + td->num_trbs = num_trbs; /* * Don't give the first TRB to the hardware (by toggling the cycle bit) @@ -3661,6 +3666,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, /* Save the DMA address of the last TRB in the TD */ td->last_trb = ep_ring->enqueue; + td->last_trb_seg = ep_ring->enq_seg; /* Queue status TRB - see Table 7 and sections 4.11.2.2 and 6.4.1.2.3 */ /* If the device sent data, the status stage is an OUT transfer */ @@ -3905,7 +3911,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, goto cleanup; } td = &urb_priv->td[i]; - + td->num_trbs = trbs_per_td; /* use SIA as default, if frame id is used overwrite it */ sia_frame_id = TRB_SIA; if (!(urb->transfer_flags & URB_ISO_ASAP) && @@ -3948,6 +3954,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, } else { more_trbs_coming = false; td->last_trb = ep_ring->enqueue; + td->last_trb_seg = ep_ring->enq_seg; field |= TRB_IOC; if (trb_block_event_intr(xhci, num_tds, i)) field |= TRB_BEI; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index bb3c362a194b..85ab213c7940 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1550,9 +1550,11 @@ struct xhci_td { struct xhci_segment *start_seg; union xhci_trb *first_trb; union xhci_trb *last_trb; + struct xhci_segment *last_trb_seg; struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + unsigned int num_trbs; }; /* xHCI command default timeout value */ From fa5aaf31e5f5aa6e57a29037b5fd6e54369f83b8 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 6 Apr 2021 10:02:08 +0300 Subject: [PATCH 34/71] xhci: prevent double-fetch of transfer and transfer event TRBs [ Upstream commit e9fcb07704fcef6fa6d0333fd2b3a62442eaf45b ] The same values are parsed several times from transfer and event TRBs by different functions in the same call path, all while processing one transfer event. As the TRBs are in DMA memory and can be accessed by the xHC host we want to avoid this to prevent double-fetch issues. To resolve this pass the already parsed values to the different functions in the path of parsing a transfer event Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210406070208.3406266-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.") Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 42 ++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b814dc07116d..62d92da7016e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2059,16 +2059,13 @@ int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code) return 0; } -static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td, - struct xhci_transfer_event *event, struct xhci_virt_ep *ep) +static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + struct xhci_ring *ep_ring, struct xhci_td *td, + u32 trb_comp_code) { struct xhci_ep_ctx *ep_ctx; - struct xhci_ring *ep_ring; - u32 trb_comp_code; - ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index); - trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len)); if (trb_comp_code == COMP_STOPPED_LENGTH_INVALID || trb_comp_code == COMP_STOPPED || @@ -2126,9 +2123,9 @@ static int sum_trb_lengths(struct xhci_hcd *xhci, struct xhci_ring *ring, /* * Process control tds, update urb status and actual_length. */ -static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, - union xhci_trb *ep_trb, struct xhci_transfer_event *event, - struct xhci_virt_ep *ep) +static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + struct xhci_ring *ep_ring, struct xhci_td *td, + union xhci_trb *ep_trb, struct xhci_transfer_event *event) { struct xhci_ep_ctx *ep_ctx; u32 trb_comp_code; @@ -2216,15 +2213,15 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, td->urb->actual_length = requested; finish_td: - return finish_td(xhci, td, event, ep); + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } /* * Process isochronous tds, update urb packet status and actual_length. */ -static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, - union xhci_trb *ep_trb, struct xhci_transfer_event *event, - struct xhci_virt_ep *ep) +static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + struct xhci_ring *ep_ring, struct xhci_td *td, + union xhci_trb *ep_trb, struct xhci_transfer_event *event) { struct urb_priv *urb_priv; int idx; @@ -2301,7 +2298,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, td->urb->actual_length += frame->actual_length; - return finish_td(xhci, td, event, ep); + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, @@ -2333,17 +2330,15 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, /* * Process bulk and interrupt tds, update urb status and actual_length. */ -static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, - union xhci_trb *ep_trb, struct xhci_transfer_event *event, - struct xhci_virt_ep *ep) +static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + struct xhci_ring *ep_ring, struct xhci_td *td, + union xhci_trb *ep_trb, struct xhci_transfer_event *event) { struct xhci_slot_ctx *slot_ctx; - struct xhci_ring *ep_ring; u32 trb_comp_code; u32 remaining, requested, ep_trb_len; slot_ctx = xhci_get_slot_ctx(xhci, ep->vdev->out_ctx); - ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len)); remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); ep_trb_len = TRB_LEN(le32_to_cpu(ep_trb->generic.field[2])); @@ -2403,7 +2398,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, remaining); td->urb->actual_length = 0; } - return finish_td(xhci, td, event, ep); + + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } /* @@ -2754,11 +2750,11 @@ static int handle_tx_event(struct xhci_hcd *xhci, /* update the urb's actual_length and give back to the core */ if (usb_endpoint_xfer_control(&td->urb->ep->desc)) - process_ctrl_td(xhci, td, ep_trb, event, ep); + process_ctrl_td(xhci, ep, ep_ring, td, ep_trb, event); else if (usb_endpoint_xfer_isoc(&td->urb->ep->desc)) - process_isoc_td(xhci, td, ep_trb, event, ep); + process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); else - process_bulk_intr_td(xhci, td, ep_trb, event, ep); + process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); cleanup: handling_skipped_tds = ep->skip && trb_comp_code != COMP_MISSED_SERVICE_ERROR && From fe2322caa07424b31522761c27f8b299e87a37a9 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:36 +0200 Subject: [PATCH 35/71] xhci: process isoc TD properly when there was a transaction error mid TD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ] The last TRB of a isoc TD might not trigger an event if there was an error event for a TRB mid TD. This is seen on a NEC Corporation uPD720200 USB 3.0 Host After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1 generate events for passed TRBs with IOC flag set if it proceeds to the next TD. This event is either a copy of the original error, or a "success" transfer event. If that event is missing then the driver and xHC host get out of sync as the driver is still expecting a transfer event for that first TD, while xHC host is already sending events for the next TD in the list. This leads to "Transfer event TRB DMA ptr not part of current TD" messages. As a solution we tag the isoc TDs that get error events mid TD. If an event doesn't match the first TD, then check if the tag is set, and event points to the next TD. In that case give back the fist TD and process the next TD normally Make sure TD status and transferred length stay valid in both cases with and without final TD completion event. Reported-by: Michał Pecio Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/ Tested-by: Michał Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++------- drivers/usb/host/xhci.h | 1 + 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 62d92da7016e..883cf477a70b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2244,6 +2244,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2269,8 +2272,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2290,6 +2294,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2298,6 +2305,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2684,17 +2699,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2706,9 +2755,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 85ab213c7940..5a8443f6ed70 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1554,6 +1554,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; From 696e4112e5c1ee61996198f0ebb6ca3fab55166e Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 25 Jan 2024 17:27:37 +0200 Subject: [PATCH 36/71] xhci: handle isoc Babble and Buffer Overrun events properly [ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ] xHCI 4.9 explicitly forbids assuming that the xHC has released its ownership of a multi-TRB TD when it reports an error on one of the early TRBs. Yet the driver makes such assumption and releases the TD, allowing the remaining TRBs to be freed or overwritten by new TDs. The xHC should also report completion of the final TRB due to its IOC flag being set by us, regardless of prior errors. This event cannot be recognized if the TD has already been freed earlier, resulting in "Transfer event TRB DMA ptr not part of current TD" error message. Fix this by reusing the logic for processing isoc Transaction Errors. This also handles hosts which fail to report the final completion. Fix transfer length reporting on Babble errors. They may be caused by device malfunction, no guarantee that the buffer has been filled. Signed-off-by: Michal Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 883cf477a70b..4fa387e447f0 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2262,9 +2262,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: From 569154b29a24587b19dfbd59374609afcf037784 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 7 Oct 2020 11:46:35 +0300 Subject: [PATCH 37/71] serial: max310x: Use devm_clk_get_optional() to get the input clock [ Upstream commit 974e454d6f96da0c0ab1b4115b92587dd9406f6a ] Simplify the code which fetches the input clock by using devm_clk_get_optional(). If no input clock is present devm_clk_get_optional() will return NULL instead of an error which matches the behavior of the old code. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201007084635.594991-2-andy.shevchenko@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 8afa6c6decea ("serial: max310x: fail probe if clock crystal is unstable") Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 14537878f985..8bf3c5ab5943 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1265,7 +1265,6 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, struct regmap *regmap, int irq) { int i, ret, fmin, fmax, freq, uartclk; - struct clk *clk_osc, *clk_xtal; struct max310x_port *s; bool xtal = false; @@ -1279,23 +1278,24 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, return -ENOMEM; } - clk_osc = devm_clk_get(dev, "osc"); - clk_xtal = devm_clk_get(dev, "xtal"); - if (!IS_ERR(clk_osc)) { - s->clk = clk_osc; + s->clk = devm_clk_get_optional(dev, "osc"); + if (IS_ERR(s->clk)) + return PTR_ERR(s->clk); + if (s->clk) { fmin = 500000; fmax = 35000000; - } else if (!IS_ERR(clk_xtal)) { - s->clk = clk_xtal; - fmin = 1000000; - fmax = 4000000; - xtal = true; - } else if (PTR_ERR(clk_osc) == -EPROBE_DEFER || - PTR_ERR(clk_xtal) == -EPROBE_DEFER) { - return -EPROBE_DEFER; } else { - dev_err(dev, "Cannot get clock\n"); - return -EINVAL; + s->clk = devm_clk_get_optional(dev, "xtal"); + if (IS_ERR(s->clk)) + return PTR_ERR(s->clk); + if (s->clk) { + fmin = 1000000; + fmax = 4000000; + xtal = true; + } else { + dev_err(dev, "Cannot get clock\n"); + return -EINVAL; + } } ret = clk_prepare_enable(s->clk); From c2b9cbf09e59331f1ab04e9a4c29a1feefad18ac Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 May 2021 20:29:30 +0300 Subject: [PATCH 38/71] serial: max310x: Try to get crystal clock rate from property [ Upstream commit d4d6f03c4fb3a91dadfe147b47edd40e4d7e4d36 ] In some configurations, mainly ACPI-based, the clock frequency of the device is supplied by very well established 'clock-frequency' property. Hence, try to get it from the property at last if no other providers are available. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210517172930.83353-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 8afa6c6decea ("serial: max310x: fail probe if clock crystal is unstable") Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 40 +++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 8bf3c5ab5943..0e0f778d75cd 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -556,7 +556,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) return 1; } -static int max310x_set_ref_clk(struct device *dev, struct max310x_port *s, +static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, unsigned long freq, bool xtal) { unsigned int div, clksrc, pllcfg = 0; @@ -629,7 +629,7 @@ static int max310x_set_ref_clk(struct device *dev, struct max310x_port *s, dev_warn(dev, "clock is not stable yet\n"); } - return (int)bestfreq; + return bestfreq; } static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len) @@ -1264,9 +1264,10 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset, static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, struct regmap *regmap, int irq) { - int i, ret, fmin, fmax, freq, uartclk; + int i, ret, fmin, fmax, freq; struct max310x_port *s; - bool xtal = false; + u32 uartclk = 0; + bool xtal; if (IS_ERR(regmap)) return PTR_ERR(regmap); @@ -1278,24 +1279,20 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, return -ENOMEM; } + /* Always ask for fixed clock rate from a property. */ + device_property_read_u32(dev, "clock-frequency", &uartclk); + s->clk = devm_clk_get_optional(dev, "osc"); if (IS_ERR(s->clk)) return PTR_ERR(s->clk); if (s->clk) { - fmin = 500000; - fmax = 35000000; + xtal = false; } else { s->clk = devm_clk_get_optional(dev, "xtal"); if (IS_ERR(s->clk)) return PTR_ERR(s->clk); - if (s->clk) { - fmin = 1000000; - fmax = 4000000; - xtal = true; - } else { - dev_err(dev, "Cannot get clock\n"); - return -EINVAL; - } + + xtal = true; } ret = clk_prepare_enable(s->clk); @@ -1303,6 +1300,21 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, return ret; freq = clk_get_rate(s->clk); + if (freq == 0) + freq = uartclk; + if (freq == 0) { + dev_err(dev, "Cannot get clock rate\n"); + return -EINVAL; + } + + if (xtal) { + fmin = 1000000; + fmax = 4000000; + } else { + fmin = 500000; + fmax = 35000000; + } + /* Check frequency limits */ if (freq < fmin || freq > fmax) { ret = -ERANGE; From c7e9e6d5ee848c6ed14731e5024ba0e394a998a2 Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:00 -0500 Subject: [PATCH 39/71] serial: max310x: fail probe if clock crystal is unstable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8afa6c6decea37e7cb473d2c60473f37f46cea35 ] A stable clock is really required in order to use this UART, so log an error message and bail out if the chip reports that the clock is not stable. Fixes: 4cf9a888fd3c ("serial: max310x: Check the clock readiness") Cc: stable@vger.kernel.org Suggested-by: Jan Kundrát Link: https://www.spinics.net/lists/linux-serial/msg35773.html Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-4-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 0e0f778d75cd..bbf45c062668 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -556,7 +556,7 @@ static int max310x_update_best_err(unsigned long f, long *besterr) return 1; } -static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, +static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, unsigned long freq, bool xtal) { unsigned int div, clksrc, pllcfg = 0; @@ -626,7 +626,8 @@ static u32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, } while (!stable && (++try < MAX310X_XTAL_WAIT_RETRIES)); if (!stable) - dev_warn(dev, "clock is not stable yet\n"); + return dev_err_probe(dev, -EAGAIN, + "clock is not stable\n"); } return bestfreq; @@ -1266,7 +1267,7 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, { int i, ret, fmin, fmax, freq; struct max310x_port *s; - u32 uartclk = 0; + s32 uartclk = 0; bool xtal; if (IS_ERR(regmap)) @@ -1350,6 +1351,11 @@ static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, } uartclk = max310x_set_ref_clk(dev, s, freq, xtal); + if (uartclk < 0) { + ret = uartclk; + goto out_uart; + } + dev_dbg(dev, "Reference clock set to %i Hz\n", uartclk); for (i = 0; i < devtype->nr; i++) { From 32e32ab1da5a775f0172f0cfe2225231e59357e6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 7 Oct 2020 11:46:34 +0300 Subject: [PATCH 40/71] serial: max310x: Make use of device properties [ Upstream commit c808fab604ca62cff19ee6b261211483830807aa ] Device property API allows to gather device resources from different sources, such as ACPI. Convert the drivers to unleash the power of device property API. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201007084635.594991-1-andy.shevchenko@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup") Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index bbf45c062668..8d42c537ee5e 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -15,8 +15,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -271,7 +271,7 @@ struct max310x_one { container_of(_port, struct max310x_one, port) struct max310x_port { - struct max310x_devtype *devtype; + const struct max310x_devtype *devtype; struct regmap *regmap; struct clk *clk; #ifdef CONFIG_GPIOLIB @@ -1262,7 +1262,7 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset, } #endif -static int max310x_probe(struct device *dev, struct max310x_devtype *devtype, +static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype, struct regmap *regmap, int irq) { int i, ret, fmin, fmax, freq; @@ -1488,7 +1488,7 @@ static struct regmap_config regcfg = { #ifdef CONFIG_SPI_MASTER static int max310x_spi_probe(struct spi_device *spi) { - struct max310x_devtype *devtype; + const struct max310x_devtype *devtype; struct regmap *regmap; int ret; @@ -1500,18 +1500,9 @@ static int max310x_spi_probe(struct spi_device *spi) if (ret) return ret; - if (spi->dev.of_node) { - const struct of_device_id *of_id = - of_match_device(max310x_dt_ids, &spi->dev); - if (!of_id) - return -ENODEV; - - devtype = (struct max310x_devtype *)of_id->data; - } else { - const struct spi_device_id *id_entry = spi_get_device_id(spi); - - devtype = (struct max310x_devtype *)id_entry->driver_data; - } + devtype = device_get_match_data(&spi->dev); + if (!devtype) + devtype = (struct max310x_devtype *)spi_get_device_id(spi)->driver_data; regcfg.max_register = devtype->nr * 0x20 - 1; regmap = devm_regmap_init_spi(spi, ®cfg); @@ -1536,7 +1527,7 @@ MODULE_DEVICE_TABLE(spi, max310x_id_table); static struct spi_driver max310x_spi_driver = { .driver = { .name = MAX310X_NAME, - .of_match_table = of_match_ptr(max310x_dt_ids), + .of_match_table = max310x_dt_ids, .pm = &max310x_pm_ops, }, .probe = max310x_spi_probe, From c1ecaadbcd4047c80a978f417d9c0cfbd8427d95 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Sun, 5 Jun 2022 17:46:56 +0300 Subject: [PATCH 41/71] serial: max310x: use regmap methods for SPI batch operations [ Upstream commit 285e76fc049c4d32c772eea9460a7ef28a193802 ] The SPI batch read/write operations can be implemented as simple regmap raw read and write, which will also try to do a gather write just as it is done here. Use the regmap raw read and write methods. Reviewed-by: Andy Shevchenko Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20220605144659.4169853-2-demonsingur@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup") Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 8d42c537ee5e..c0fa4ad10477 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -263,8 +263,6 @@ struct max310x_one { struct work_struct md_work; struct work_struct rs_work; - u8 wr_header; - u8 rd_header; u8 rx_buf[MAX310X_FIFO_SIZE]; }; #define to_max310x_port(_port) \ @@ -635,32 +633,18 @@ static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len) { - struct max310x_one *one = to_max310x_port(port); - struct spi_transfer xfer[] = { - { - .tx_buf = &one->wr_header, - .len = sizeof(one->wr_header), - }, { - .tx_buf = txbuf, - .len = len, - } - }; - spi_sync_transfer(to_spi_device(port->dev), xfer, ARRAY_SIZE(xfer)); + struct max310x_port *s = dev_get_drvdata(port->dev); + u8 reg = port->iobase + MAX310X_THR_REG; + + regmap_raw_write(s->regmap, reg, txbuf, len); } static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len) { - struct max310x_one *one = to_max310x_port(port); - struct spi_transfer xfer[] = { - { - .tx_buf = &one->rd_header, - .len = sizeof(one->rd_header), - }, { - .rx_buf = rxbuf, - .len = len, - } - }; - spi_sync_transfer(to_spi_device(port->dev), xfer, ARRAY_SIZE(xfer)); + struct max310x_port *s = dev_get_drvdata(port->dev); + u8 reg = port->iobase + MAX310X_RHR_REG; + + regmap_raw_read(s->regmap, reg, rxbuf, len); } static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen) @@ -1390,10 +1374,6 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty INIT_WORK(&s->p[i].md_work, max310x_md_proc); /* Initialize queue for changing RS485 mode */ INIT_WORK(&s->p[i].rs_work, max310x_rs_proc); - /* Initialize SPI-transfer buffers */ - s->p[i].wr_header = (s->p[i].port.iobase + MAX310X_THR_REG) | - MAX310X_WRITE_BIT; - s->p[i].rd_header = (s->p[i].port.iobase + MAX310X_RHR_REG); /* Register port */ ret = uart_add_one_port(&max310x_uart, &s->p[i].port); From fe0d16b3a3c3cedc8185c0ed4220b21e08e2e842 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Sun, 5 Jun 2022 17:46:57 +0300 Subject: [PATCH 42/71] serial: max310x: use a separate regmap for each port [ Upstream commit 6ef281daf020592c219fa91780abc381c6c20db5 ] The driver currently does manual register manipulation in multiple places to talk to a specific UART port. In order to talk to a specific UART port over SPI, the bits U1 and U0 of the register address can be set, as explained in the Command byte configuration section of the datasheet. Make this more elegant by creating regmaps for each UART port and setting the read_flag_mask and write_flag_mask accordingly. All communcations regarding global registers are done on UART port 0, so replace the global regmap entirely with the port 0 regmap. Also, remove the 0x1f masks from reg_writeable(), reg_volatile() and reg_precious() methods, since setting the U1 and U0 bits of the register address happens inside the regmap core now. Reviewed-by: Andy Shevchenko Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20220605144659.4169853-3-demonsingur@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: b35f8dbbce81 ("serial: max310x: prevent infinite while() loop in port startup") Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 68 +++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index c0fa4ad10477..80298a5714bc 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -262,6 +262,7 @@ struct max310x_one { struct work_struct tx_work; struct work_struct md_work; struct work_struct rs_work; + struct regmap *regmap; u8 rx_buf[MAX310X_FIFO_SIZE]; }; @@ -291,26 +292,26 @@ static DECLARE_BITMAP(max310x_lines, MAX310X_UART_NRMAX); static u8 max310x_port_read(struct uart_port *port, u8 reg) { - struct max310x_port *s = dev_get_drvdata(port->dev); + struct max310x_one *one = to_max310x_port(port); unsigned int val = 0; - regmap_read(s->regmap, port->iobase + reg, &val); + regmap_read(one->regmap, reg, &val); return val; } static void max310x_port_write(struct uart_port *port, u8 reg, u8 val) { - struct max310x_port *s = dev_get_drvdata(port->dev); + struct max310x_one *one = to_max310x_port(port); - regmap_write(s->regmap, port->iobase + reg, val); + regmap_write(one->regmap, reg, val); } static void max310x_port_update(struct uart_port *port, u8 reg, u8 mask, u8 val) { - struct max310x_port *s = dev_get_drvdata(port->dev); + struct max310x_one *one = to_max310x_port(port); - regmap_update_bits(s->regmap, port->iobase + reg, mask, val); + regmap_update_bits(one->regmap, reg, mask, val); } static int max3107_detect(struct device *dev) @@ -449,7 +450,7 @@ static const struct max310x_devtype max14830_devtype = { static bool max310x_reg_writeable(struct device *dev, unsigned int reg) { - switch (reg & 0x1f) { + switch (reg) { case MAX310X_IRQSTS_REG: case MAX310X_LSR_IRQSTS_REG: case MAX310X_SPCHR_IRQSTS_REG: @@ -466,7 +467,7 @@ static bool max310x_reg_writeable(struct device *dev, unsigned int reg) static bool max310x_reg_volatile(struct device *dev, unsigned int reg) { - switch (reg & 0x1f) { + switch (reg) { case MAX310X_RHR_REG: case MAX310X_IRQSTS_REG: case MAX310X_LSR_IRQSTS_REG: @@ -488,7 +489,7 @@ static bool max310x_reg_volatile(struct device *dev, unsigned int reg) static bool max310x_reg_precious(struct device *dev, unsigned int reg) { - switch (reg & 0x1f) { + switch (reg) { case MAX310X_RHR_REG: case MAX310X_IRQSTS_REG: case MAX310X_SPCHR_IRQSTS_REG: @@ -633,18 +634,16 @@ static s32 max310x_set_ref_clk(struct device *dev, struct max310x_port *s, static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int len) { - struct max310x_port *s = dev_get_drvdata(port->dev); - u8 reg = port->iobase + MAX310X_THR_REG; + struct max310x_one *one = to_max310x_port(port); - regmap_raw_write(s->regmap, reg, txbuf, len); + regmap_raw_write(one->regmap, MAX310X_THR_REG, txbuf, len); } static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len) { - struct max310x_port *s = dev_get_drvdata(port->dev); - u8 reg = port->iobase + MAX310X_RHR_REG; + struct max310x_one *one = to_max310x_port(port); - regmap_raw_read(s->regmap, reg, rxbuf, len); + regmap_raw_read(one->regmap, MAX310X_RHR_REG, rxbuf, len); } static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen) @@ -1247,15 +1246,16 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset, #endif static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype, - struct regmap *regmap, int irq) + struct regmap *regmaps[], int irq) { int i, ret, fmin, fmax, freq; struct max310x_port *s; s32 uartclk = 0; bool xtal; - if (IS_ERR(regmap)) - return PTR_ERR(regmap); + for (i = 0; i < devtype->nr; i++) + if (IS_ERR(regmaps[i])) + return PTR_ERR(regmaps[i]); /* Alloc port structure */ s = devm_kzalloc(dev, struct_size(s, p, devtype->nr), GFP_KERNEL); @@ -1306,7 +1306,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; } - s->regmap = regmap; + s->regmap = regmaps[0]; s->devtype = devtype; dev_set_drvdata(dev, s); @@ -1316,22 +1316,18 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; for (i = 0; i < devtype->nr; i++) { - unsigned int offs = i << 5; - /* Reset port */ - regmap_write(s->regmap, MAX310X_MODE2_REG + offs, + regmap_write(regmaps[i], MAX310X_MODE2_REG, MAX310X_MODE2_RST_BIT); /* Clear port reset */ - regmap_write(s->regmap, MAX310X_MODE2_REG + offs, 0); + regmap_write(regmaps[i], MAX310X_MODE2_REG, 0); /* Wait for port startup */ do { - regmap_read(s->regmap, - MAX310X_BRGDIVLSB_REG + offs, &ret); + regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); } while (ret != 0x01); - regmap_write(s->regmap, MAX310X_MODE1_REG + offs, - devtype->mode1); + regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); } uartclk = max310x_set_ref_clk(dev, s, freq, xtal); @@ -1359,11 +1355,13 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty s->p[i].port.fifosize = MAX310X_FIFO_SIZE; s->p[i].port.flags = UPF_FIXED_TYPE | UPF_LOW_LATENCY; s->p[i].port.iotype = UPIO_PORT; - s->p[i].port.iobase = i * 0x20; + s->p[i].port.iobase = i; s->p[i].port.membase = (void __iomem *)~0; s->p[i].port.uartclk = uartclk; s->p[i].port.rs485_config = max310x_rs485_config; s->p[i].port.ops = &max310x_ops; + s->p[i].regmap = regmaps[i]; + /* Disable all interrupts */ max310x_port_write(&s->p[i].port, MAX310X_IRQEN_REG, 0); /* Clear IRQ status register */ @@ -1460,6 +1458,7 @@ static struct regmap_config regcfg = { .val_bits = 8, .write_flag_mask = MAX310X_WRITE_BIT, .cache_type = REGCACHE_RBTREE, + .max_register = MAX310X_REG_1F, .writeable_reg = max310x_reg_writeable, .volatile_reg = max310x_reg_volatile, .precious_reg = max310x_reg_precious, @@ -1469,7 +1468,8 @@ static struct regmap_config regcfg = { static int max310x_spi_probe(struct spi_device *spi) { const struct max310x_devtype *devtype; - struct regmap *regmap; + struct regmap *regmaps[4]; + unsigned int i; int ret; /* Setup SPI bus */ @@ -1484,10 +1484,14 @@ static int max310x_spi_probe(struct spi_device *spi) if (!devtype) devtype = (struct max310x_devtype *)spi_get_device_id(spi)->driver_data; - regcfg.max_register = devtype->nr * 0x20 - 1; - regmap = devm_regmap_init_spi(spi, ®cfg); + for (i = 0; i < devtype->nr; i++) { + u8 port_mask = i * 0x20; + regcfg.read_flag_mask = port_mask; + regcfg.write_flag_mask = port_mask | MAX310X_WRITE_BIT; + regmaps[i] = devm_regmap_init_spi(spi, ®cfg); + } - return max310x_probe(&spi->dev, devtype, regmap, spi->irq); + return max310x_probe(&spi->dev, devtype, regmaps, spi->irq); } static int max310x_spi_remove(struct spi_device *spi) From 518ec3da99f86a7b5b5f41048f51b2079a3c412b Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Tue, 16 Jan 2024 16:30:01 -0500 Subject: [PATCH 43/71] serial: max310x: prevent infinite while() loop in port startup [ Upstream commit b35f8dbbce818b02c730dc85133dc7754266e084 ] If there is a problem after resetting a port, the do/while() loop that checks the default value of DIVLSB register may run forever and spam the I2C bus. Add a delay before each read of DIVLSB, and a maximum number of tries to prevent that situation from happening. Also fail probe if port reset is unsuccessful. Fixes: 10d8b34a4217 ("serial: max310x: Driver rework") Cc: stable@vger.kernel.org Signed-off-by: Hugo Villeneuve Link: https://lore.kernel.org/r/20240116213001.3691629-5-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 80298a5714bc..978d9d93127e 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -235,6 +235,10 @@ #define MAX310x_REV_MASK (0xf8) #define MAX310X_WRITE_BIT 0x80 +/* Port startup definitions */ +#define MAX310X_PORT_STARTUP_WAIT_RETRIES 20 /* Number of retries */ +#define MAX310X_PORT_STARTUP_WAIT_DELAY_MS 10 /* Delay between retries */ + /* Crystal-related definitions */ #define MAX310X_XTAL_WAIT_RETRIES 20 /* Number of retries */ #define MAX310X_XTAL_WAIT_DELAY_MS 10 /* Delay between retries */ @@ -1316,6 +1320,9 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty goto out_clk; for (i = 0; i < devtype->nr; i++) { + bool started = false; + unsigned int try = 0, val = 0; + /* Reset port */ regmap_write(regmaps[i], MAX310X_MODE2_REG, MAX310X_MODE2_RST_BIT); @@ -1324,8 +1331,17 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty /* Wait for port startup */ do { - regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &ret); - } while (ret != 0x01); + msleep(MAX310X_PORT_STARTUP_WAIT_DELAY_MS); + regmap_read(regmaps[i], MAX310X_BRGDIVLSB_REG, &val); + + if (val == 0x01) + started = true; + } while (!started && (++try < MAX310X_PORT_STARTUP_WAIT_RETRIES)); + + if (!started) { + ret = dev_err_probe(dev, -EAGAIN, "port reset failed\n"); + goto out_uart; + } regmap_write(regmaps[i], MAX310X_MODE1_REG, devtype->mode1); } From be155e9466dd29cf19601c12ebd3ac9c14edc886 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Sep 2022 17:27:56 -0700 Subject: [PATCH 44/71] net: Change sock_getsockopt() to take the sk ptr instead of the sock ptr [ Upstream commit ba74a7608dc12fbbd8ea36e660087f08a81ef26a ] A latter patch refactors bpf_getsockopt(SOL_SOCKET) with the sock_getsockopt() to avoid code duplication and code drift between the two duplicates. The current sock_getsockopt() takes sock ptr as the argument. The very first thing of this function is to get back the sk ptr by 'sk = sock->sk'. bpf_getsockopt() could be called when the sk does not have the sock ptr created. Meaning sk->sk_socket is NULL. For example, when a passive tcp connection has just been established but has yet been accept()-ed. Thus, it cannot use the sock_getsockopt(sk->sk_socket) or else it will pass a NULL ptr. This patch moves all sock_getsockopt implementation to the newly added sk_getsockopt(). The new sk_getsockopt() takes a sk ptr and immediately gets the sock ptr by 'sock = sk->sk_socket' The existing sock_getsockopt(sock) is changed to call sk_getsockopt(sock->sk). All existing callers have both sock->sk and sk->sk_socket pointer. The latter patch will make bpf_getsockopt(SOL_SOCKET) call sk_getsockopt(sk) directly. The bpf_getsockopt(SOL_SOCKET) does not use the optnames that require sk->sk_socket, so it will be safe. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220902002756.2887884-1-kafai@fb.com Signed-off-by: Alexei Starovoitov Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks") Signed-off-by: Sasha Levin --- net/core/sock.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 769e969cd1dc..95559d088a16 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1293,10 +1293,10 @@ static int groups_to_user(gid_t __user *dst, const struct group_info *src) return 0; } -int sock_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) +static int sk_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - struct sock *sk = sock->sk; + struct socket *sock = sk->sk_socket; union { int val; @@ -1633,6 +1633,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, return 0; } +int sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + return sk_getsockopt(sock->sk, level, optname, optval, optlen); +} + /* * Initialize an sk_lock. * From a9482f3b4877b45f5d8d551a1b858fad2bc24e40 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 1 Sep 2022 17:28:02 -0700 Subject: [PATCH 45/71] bpf: net: Change sk_getsockopt() to take the sockptr_t argument [ Upstream commit 4ff09db1b79b98b4a2a7511571c640b76cab3beb ] This patch changes sk_getsockopt() to take the sockptr_t argument such that it can be used by bpf_getsockopt(SOL_SOCKET) in a latter patch. security_socket_getpeersec_stream() is not changed. It stays with the __user ptr (optval.user and optlen.user) to avoid changes to other security hooks. bpf_getsockopt(SOL_SOCKET) also does not support SO_PEERSEC. Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220902002802.2888419-1-kafai@fb.com Signed-off-by: Alexei Starovoitov Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks") Signed-off-by: Sasha Levin --- include/linux/filter.h | 3 +-- include/linux/sockptr.h | 5 +++++ net/core/filter.c | 5 ++--- net/core/sock.c | 43 +++++++++++++++++++++++------------------ 4 files changed, 32 insertions(+), 24 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index bc6ce4b202a8..cd56e53bd42e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -892,8 +892,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); void sk_reuseport_prog_free(struct bpf_prog *prog); int sk_detach_filter(struct sock *sk); -int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, - unsigned int len); +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len); bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index ea193414298b..38862819e77a 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -64,6 +64,11 @@ static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, return 0; } +static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size) +{ + return copy_to_sockptr_offset(dst, 0, src, size); +} + static inline void *memdup_sockptr(sockptr_t src, size_t len) { void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); diff --git a/net/core/filter.c b/net/core/filter.c index 6cfc8fb0562a..49e4d1535cc8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9903,8 +9903,7 @@ int sk_detach_filter(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_detach_filter); -int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, - unsigned int len) +int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len) { struct sock_fprog_kern *fprog; struct sk_filter *filter; @@ -9935,7 +9934,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, goto out; ret = -EFAULT; - if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog))) + if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog))) goto out; /* Instead of bytes, the API requests to return the number diff --git a/net/core/sock.c b/net/core/sock.c index 95559d088a16..42da46965b16 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -644,8 +644,8 @@ static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) return ret; } -static int sock_getbindtodevice(struct sock *sk, char __user *optval, - int __user *optlen, int len) +static int sock_getbindtodevice(struct sock *sk, sockptr_t optval, + sockptr_t optlen, int len) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -668,12 +668,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval, len = strlen(devname) + 1; ret = -EFAULT; - if (copy_to_user(optval, devname, len)) + if (copy_to_sockptr(optval, devname, len)) goto out; zero: ret = -EFAULT; - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) goto out; ret = 0; @@ -1281,20 +1281,23 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred, } } -static int groups_to_user(gid_t __user *dst, const struct group_info *src) +static int groups_to_user(sockptr_t dst, const struct group_info *src) { struct user_namespace *user_ns = current_user_ns(); int i; - for (i = 0; i < src->ngroups; i++) - if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i)) + for (i = 0; i < src->ngroups; i++) { + gid_t gid = from_kgid_munged(user_ns, src->gid[i]); + + if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid))) return -EFAULT; + } return 0; } static int sk_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) + sockptr_t optval, sockptr_t optlen) { struct socket *sock = sk->sk_socket; @@ -1312,7 +1315,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, int lv = sizeof(int); int len; - if (get_user(len, optlen)) + if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; if (len < 0) return -EINVAL; @@ -1445,7 +1448,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); spin_unlock(&sk->sk_peer_lock); - if (copy_to_user(optval, &peercred, len)) + if (copy_to_sockptr(optval, &peercred, len)) return -EFAULT; goto lenout; } @@ -1463,11 +1466,11 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, if (len < n * sizeof(gid_t)) { len = n * sizeof(gid_t); put_cred(cred); - return put_user(len, optlen) ? -EFAULT : -ERANGE; + return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE; } len = n * sizeof(gid_t); - ret = groups_to_user((gid_t __user *)optval, cred->group_info); + ret = groups_to_user(optval, cred->group_info); put_cred(cred); if (ret) return ret; @@ -1483,7 +1486,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, return -ENOTCONN; if (lv < len) return -EINVAL; - if (copy_to_user(optval, address, len)) + if (copy_to_sockptr(optval, address, len)) return -EFAULT; goto lenout; } @@ -1500,7 +1503,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_PEERSEC: - return security_socket_getpeersec_stream(sock, optval, optlen, len); + return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len); case SO_MARK: v.val = sk->sk_mark; @@ -1528,7 +1531,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, return sock_getbindtodevice(sk, optval, optlen, len); case SO_GET_FILTER: - len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); + len = sk_get_filter(sk, optval, len); if (len < 0) return len; @@ -1575,7 +1578,7 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, sk_get_meminfo(sk, meminfo); len = min_t(unsigned int, len, sizeof(meminfo)); - if (copy_to_user(optval, &meminfo, len)) + if (copy_to_sockptr(optval, &meminfo, len)) return -EFAULT; goto lenout; @@ -1625,10 +1628,10 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, if (len > lv) len = lv; - if (copy_to_user(optval, &v, len)) + if (copy_to_sockptr(optval, &v, len)) return -EFAULT; lenout: - if (put_user(len, optlen)) + if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; return 0; } @@ -1636,7 +1639,9 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, int sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { - return sk_getsockopt(sock->sk, level, optname, optval, optlen); + return sk_getsockopt(sock->sk, level, optname, + USER_SOCKPTR(optval), + USER_SOCKPTR(optlen)); } /* From ea6e87db9037f285b3d3d269bb405e0e8dd15d34 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 10 Oct 2022 12:31:21 -0400 Subject: [PATCH 46/71] lsm: make security_socket_getpeersec_stream() sockptr_t safe [ Upstream commit b10b9c342f7571f287fd422be5d5c0beb26ba974 ] Commit 4ff09db1b79b ("bpf: net: Change sk_getsockopt() to take the sockptr_t argument") made it possible to call sk_getsockopt() with both user and kernel address space buffers through the use of the sockptr_t type. Unfortunately at the time of conversion the security_socket_getpeersec_stream() LSM hook was written to only accept userspace buffers, and in a desire to avoid having to change the LSM hook the commit author simply passed the sockptr_t's userspace buffer pointer. Since the only sk_getsockopt() callers at the time of conversion which used kernel sockptr_t buffers did not allow SO_PEERSEC, and hence the security_socket_getpeersec_stream() hook, this was acceptable but also very fragile as future changes presented the possibility of silently passing kernel space pointers to the LSM hook. There are several ways to protect against this, including careful code review of future commits, but since relying on code review to catch bugs is a recipe for disaster and the upstream eBPF maintainer is "strongly against defensive programming", this patch updates the LSM hook, and all of the implementations to support sockptr_t and safely handle both user and kernel space buffers. Acked-by: Casey Schaufler Acked-by: John Johansen Signed-off-by: Paul Moore Stable-dep-of: 5a287d3d2b9d ("lsm: fix default return value of the socket_getpeersec_*() hooks") Signed-off-by: Sasha Levin --- include/linux/lsm_hook_defs.h | 2 +- include/linux/lsm_hooks.h | 4 ++-- include/linux/security.h | 11 +++++++---- net/core/sock.c | 3 ++- security/apparmor/lsm.c | 29 +++++++++++++---------------- security/security.c | 6 +++--- security/selinux/hooks.c | 13 ++++++------- security/smack/smack_lsm.c | 19 ++++++++++--------- 8 files changed, 44 insertions(+), 43 deletions(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 92a76ce0c382..9f550eab8ebd 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -294,7 +294,7 @@ LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, - char __user *optval, int __user *optlen, unsigned len) + sockptr_t optval, sockptr_t optlen, unsigned int len) LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 64cdf4d7bfb3..bbf9c8c7bd9c 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -926,8 +926,8 @@ * SO_GETPEERSEC. For tcp sockets this can be meaningful if the * socket is associated with an ipsec SA. * @sock is the local socket. - * @optval userspace memory where the security state is to be copied. - * @optlen userspace int where the module should copy the actual length + * @optval memory where the security state is to be copied. + * @optlen memory where the module should copy the actual length * of the security state. * @len as input is the maximum length to copy to userspace provided * by the caller. diff --git a/include/linux/security.h b/include/linux/security.h index e388b1666bcf..5b61aa19fac6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,6 +31,7 @@ #include #include #include +#include struct linux_binprm; struct cred; @@ -1366,8 +1367,8 @@ int security_socket_getsockopt(struct socket *sock, int level, int optname); int security_socket_setsockopt(struct socket *sock, int level, int optname); int security_socket_shutdown(struct socket *sock, int how); int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb); -int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, - int __user *optlen, unsigned len); +int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, + sockptr_t optlen, unsigned int len); int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid); int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); @@ -1501,8 +1502,10 @@ static inline int security_sock_rcv_skb(struct sock *sk, return 0; } -static inline int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, - int __user *optlen, unsigned len) +static inline int security_socket_getpeersec_stream(struct socket *sock, + sockptr_t optval, + sockptr_t optlen, + unsigned int len) { return -ENOPROTOOPT; } diff --git a/net/core/sock.c b/net/core/sock.c index 42da46965b16..016c0b9e01b7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1503,7 +1503,8 @@ static int sk_getsockopt(struct sock *sk, int level, int optname, break; case SO_PEERSEC: - return security_socket_getpeersec_stream(sock, optval.user, optlen.user, len); + return security_socket_getpeersec_stream(sock, + optval, optlen, len); case SO_MARK: v.val = sk->sk_mark; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 585edcc6814d..052f1b920e43 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1070,11 +1070,10 @@ static struct aa_label *sk_peer_label(struct sock *sk) * Note: for tcp only valid if using ipsec or cipso on lan */ static int apparmor_socket_getpeersec_stream(struct socket *sock, - char __user *optval, - int __user *optlen, + sockptr_t optval, sockptr_t optlen, unsigned int len) { - char *name; + char *name = NULL; int slen, error = 0; struct aa_label *label; struct aa_label *peer; @@ -1091,23 +1090,21 @@ static int apparmor_socket_getpeersec_stream(struct socket *sock, /* don't include terminating \0 in slen, it breaks some apps */ if (slen < 0) { error = -ENOMEM; - } else { - if (slen > len) { - error = -ERANGE; - } else if (copy_to_user(optval, name, slen)) { - error = -EFAULT; - goto out; - } - if (put_user(slen, optlen)) - error = -EFAULT; -out: - kfree(name); - + goto done; + } + if (slen > len) { + error = -ERANGE; + goto done_len; } + if (copy_to_sockptr(optval, name, slen)) + error = -EFAULT; +done_len: + if (copy_to_sockptr(optlen, &slen, sizeof(slen))) + error = -EFAULT; done: end_current_label_crit_section(label); - + kfree(name); return error; } diff --git a/security/security.c b/security/security.c index 269c3965393f..e9dcde3c4f14 100644 --- a/security/security.c +++ b/security/security.c @@ -2224,11 +2224,11 @@ int security_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(security_sock_rcv_skb); -int security_socket_getpeersec_stream(struct socket *sock, char __user *optval, - int __user *optlen, unsigned len) +int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, + sockptr_t optlen, unsigned int len) { return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, - optval, optlen, len); + optval, optlen, len); } int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 50d3ddfe15fd..46c00a68bb4b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5110,11 +5110,12 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) return err; } -static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *optval, - int __user *optlen, unsigned len) +static int selinux_socket_getpeersec_stream(struct socket *sock, + sockptr_t optval, sockptr_t optlen, + unsigned int len) { int err = 0; - char *scontext; + char *scontext = NULL; u32 scontext_len; struct sk_security_struct *sksec = sock->sk->sk_security; u32 peer_sid = SECSID_NULL; @@ -5130,17 +5131,15 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op &scontext_len); if (err) return err; - if (scontext_len > len) { err = -ERANGE; goto out_len; } - if (copy_to_user(optval, scontext, scontext_len)) + if (copy_to_sockptr(optval, scontext, scontext_len)) err = -EFAULT; - out_len: - if (put_user(scontext_len, optlen)) + if (copy_to_sockptr(optlen, &scontext_len, sizeof(scontext_len))) err = -EFAULT; kfree(scontext); return err; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e1669759403a..5388f143eecd 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4022,12 +4022,12 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) * returns zero on success, an error code otherwise */ static int smack_socket_getpeersec_stream(struct socket *sock, - char __user *optval, - int __user *optlen, unsigned len) + sockptr_t optval, sockptr_t optlen, + unsigned int len) { struct socket_smack *ssp; char *rcp = ""; - int slen = 1; + u32 slen = 1; int rc = 0; ssp = sock->sk->sk_security; @@ -4035,15 +4035,16 @@ static int smack_socket_getpeersec_stream(struct socket *sock, rcp = ssp->smk_packet->smk_known; slen = strlen(rcp) + 1; } - - if (slen > len) + if (slen > len) { rc = -ERANGE; - else if (copy_to_user(optval, rcp, slen) != 0) - rc = -EFAULT; + goto out_len; + } - if (put_user(slen, optlen) != 0) + if (copy_to_sockptr(optval, rcp, slen)) + rc = -EFAULT; +out_len: + if (copy_to_sockptr(optlen, &slen, sizeof(slen))) rc = -EFAULT; - return rc; } From c09ffff246cab96cc35eb40b3faf849b7cfa3ae0 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 26 Jan 2024 19:45:31 +0100 Subject: [PATCH 47/71] lsm: fix default return value of the socket_getpeersec_*() hooks [ Upstream commit 5a287d3d2b9de2b3e747132c615599907ba5c3c1 ] For these hooks the true "neutral" value is -EOPNOTSUPP, which is currently what is returned when no LSM provides this hook and what LSMs return when there is no security context set on the socket. Correct the value in and adjust the dispatch functions in security/security.c to avoid issues when the BPF LSM is enabled. Cc: stable@vger.kernel.org Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: Ondrej Mosnacek [PM: subject line tweak] Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- include/linux/lsm_hook_defs.h | 4 ++-- security/security.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 9f550eab8ebd..07abcd384975 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -293,9 +293,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) -LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) -LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) diff --git a/security/security.c b/security/security.c index e9dcde3c4f14..0bbcb100ba8e 100644 --- a/security/security.c +++ b/security/security.c @@ -2227,14 +2227,37 @@ EXPORT_SYMBOL(security_sock_rcv_skb); int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) { - return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, - optval, optlen, len); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream, + list) { + rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen, + len); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_stream); } int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { - return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock, - skb, secid); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram, + list) { + rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_dgram); } EXPORT_SYMBOL(security_socket_getpeersec_dgram); From b3bca5e8c76a157861e1644d62e7635de0d01785 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 24 Apr 2023 11:38:45 +0800 Subject: [PATCH 48/71] ext4: make ext4_es_insert_extent() return void [ Upstream commit 6c120399cde6b1b5cf65ce403765c579fb3d3e50 ] Now ext4_es_insert_extent() never return error, so make it return void. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230424033846.4732-12-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Stable-dep-of: acf795dc161f ("ext4: convert to exclusive lock while inserting delalloc extents") Signed-off-by: Sasha Levin --- fs/ext4/extents.c | 5 +++-- fs/ext4/extents_status.c | 14 ++++++-------- fs/ext4/extents_status.h | 6 +++--- fs/ext4/inode.c | 21 ++++++--------------- 4 files changed, 18 insertions(+), 28 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 68aa8760cb46..9e1259272791 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3107,8 +3107,9 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) if (ee_len == 0) return 0; - return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, - EXTENT_STATUS_WRITTEN); + ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, + EXTENT_STATUS_WRITTEN); + return 0; } /* FIXME!! we need to try to merge to left or right after zero-out */ diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index cccbdfd49a86..f37e62546745 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -846,12 +846,10 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes, /* * ext4_es_insert_extent() adds information to an inode's extent * status tree. - * - * Return 0 on success, error code on failure. */ -int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len, ext4_fsblk_t pblk, - unsigned int status) +void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, ext4_fsblk_t pblk, + unsigned int status) { struct extent_status newes; ext4_lblk_t end = lblk + len - 1; @@ -863,13 +861,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, bool revise_pending = false; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) - return 0; + return; es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", lblk, len, pblk, status, inode->i_ino); if (!len) - return 0; + return; BUG_ON(end < lblk); @@ -938,7 +936,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, goto retry; ext4_es_print_tree(inode); - return 0; + return; } /* diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 4ec30a798260..481ec4381bee 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -127,9 +127,9 @@ extern int __init ext4_init_es(void); extern void ext4_exit_es(void); extern void ext4_es_init_tree(struct ext4_es_tree *tree); -extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len, ext4_fsblk_t pblk, - unsigned int status); +extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, ext4_fsblk_t pblk, + unsigned int status); extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk, unsigned int status); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 365c4d3a434a..ab2a7f990288 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -589,10 +589,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, map->m_lblk + map->m_len - 1)) status |= EXTENT_STATUS_DELAYED; - ret = ext4_es_insert_extent(inode, map->m_lblk, - map->m_len, map->m_pblk, status); - if (ret < 0) - retval = ret; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); } up_read((&EXT4_I(inode)->i_data_sem)); @@ -701,12 +699,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, map->m_lblk + map->m_len - 1)) status |= EXTENT_STATUS_DELAYED; - ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, - map->m_pblk, status); - if (ret < 0) { - retval = ret; - goto out_sem; - } + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); } out_sem: @@ -1800,7 +1794,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, set_buffer_new(bh); set_buffer_delay(bh); } else if (retval > 0) { - int ret; unsigned int status; if (unlikely(retval != map->m_len)) { @@ -1813,10 +1806,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; - ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, - map->m_pblk, status); - if (ret != 0) - retval = ret; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); } out_unlock: From 5b69dabd7e4adc9baf3ad5daf822a4f8b9311840 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:00 +0800 Subject: [PATCH 49/71] ext4: refactor ext4_da_map_blocks() [ Upstream commit 3fcc2b887a1ba4c1f45319cd8c54daa263ecbc36 ] Refactor and cleanup ext4_da_map_blocks(), reduce some unnecessary parameters and branches, no logic changes. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Stable-dep-of: acf795dc161f ("ext4: convert to exclusive lock while inserting delalloc extents") Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 39 +++++++++++++++++---------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ab2a7f990288..64162470a7e6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1729,7 +1729,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { if (ext4_es_is_hole(&es)) { - retval = 0; down_read(&EXT4_I(inode)->i_data_sem); goto add_delayed; } @@ -1774,26 +1773,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - -add_delayed: - if (retval == 0) { - int ret; - - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ - - ret = ext4_insert_delayed_block(inode, map->m_lblk); - if (ret != 0) { - retval = ret; - goto out_unlock; - } - - map_bh(bh, inode->i_sb, invalid_block); - set_buffer_new(bh); - set_buffer_delay(bh); - } else if (retval > 0) { + if (retval < 0) + goto out_unlock; + if (retval > 0) { unsigned int status; if (unlikely(retval != map->m_len)) { @@ -1808,11 +1790,24 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); + goto out_unlock; } +add_delayed: + /* + * XXX: __block_prepare_write() unmaps passed block, + * is it OK? + */ + retval = ext4_insert_delayed_block(inode, map->m_lblk); + if (retval) + goto out_unlock; + + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); + out_unlock: up_read((&EXT4_I(inode)->i_data_sem)); - return retval; } From 58bf67d524e2c10bd11ee111c06d2a5cd7fd3b3d Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Sat, 27 Jan 2024 09:58:01 +0800 Subject: [PATCH 50/71] ext4: convert to exclusive lock while inserting delalloc extents [ Upstream commit acf795dc161f3cf481db20f05db4250714e375e5 ] ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem when inserting delalloc extents, it could be raced by another querying path of ext4_map_blocks() without i_rwsem, .e.g buffered read path. Suppose we buffered read a file containing just a hole, and without any cached extents tree, then it is raced by another delayed buffered write to the same area or the near area belongs to the same hole, and the new delalloc extent could be overwritten to a hole extent. pread() pwrite() filemap_read_folio() ext4_mpage_readpages() ext4_map_blocks() down_read(i_data_sem) ext4_ext_determine_hole() //find hole ext4_ext_put_gap_in_cache() ext4_es_find_extent_range() //no delalloc extent ext4_da_map_blocks() down_read(i_data_sem) ext4_insert_delayed_block() //insert delalloc extent ext4_es_insert_extent() //overwrite delalloc extent to hole This race could lead to inconsistent delalloc extents tree and incorrect reserved space counter. Fix this by converting to hold i_data_sem in exclusive mode when adding a new delalloc extent in ext4_da_map_blocks(). Cc: stable@vger.kernel.org Signed-off-by: Zhang Yi Suggested-by: Jan Kara Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240127015825.1608160-3-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/inode.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 64162470a7e6..8b48ed351c4b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1728,10 +1728,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { - if (ext4_es_is_hole(&es)) { - down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_es_is_hole(&es)) goto add_delayed; - } /* * Delayed extent could be allocated by fallocate. @@ -1773,8 +1771,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, retval = ext4_ext_map_blocks(NULL, inode, map, 0); else retval = ext4_ind_map_blocks(NULL, inode, map, 0); - if (retval < 0) - goto out_unlock; + if (retval < 0) { + up_read(&EXT4_I(inode)->i_data_sem); + return retval; + } if (retval > 0) { unsigned int status; @@ -1790,24 +1790,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ext4_es_insert_extent(inode, map->m_lblk, map->m_len, map->m_pblk, status); - goto out_unlock; + up_read(&EXT4_I(inode)->i_data_sem); + return retval; } + up_read(&EXT4_I(inode)->i_data_sem); add_delayed: - /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? - */ + down_write(&EXT4_I(inode)->i_data_sem); retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); if (retval) - goto out_unlock; + return retval; map_bh(bh, inode->i_sb, invalid_block); set_buffer_new(bh); set_buffer_delay(bh); - -out_unlock: - up_read((&EXT4_I(inode)->i_data_sem)); return retval; } From 2ce36635004a6608f6d475abd22668b75a8a8977 Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Mon, 9 Nov 2020 11:04:00 +0100 Subject: [PATCH 51/71] Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus hardening [ Upstream commit e8b7db38449ac5b950a3f00519171c4be3e226ff ] Currently, VMbus drivers use pointers into guest memory as request IDs for interactions with Hyper-V. To be more robust in the face of errors or malicious behavior from a compromised Hyper-V, avoid exposing guest memory addresses to Hyper-V. Also avoid Hyper-V giving back a bad request ID that is then treated as the address of a guest data structure with no validation. Instead, encapsulate these memory addresses and provide small integers as request IDs. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Reviewed-by: Wei Liu Link: https://lore.kernel.org/r/20201109100402.8946-2-parri.andrea@gmail.com Signed-off-by: Wei Liu Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") Signed-off-by: Sasha Levin --- drivers/hv/channel.c | 174 ++++++++++++++++++++++++++++++++++++-- drivers/hv/hyperv_vmbus.h | 3 +- drivers/hv/ring_buffer.c | 29 ++++++- include/linux/hyperv.h | 22 +++++ 4 files changed, 219 insertions(+), 9 deletions(-) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index f064fa6ef181..a59ab2f3d68e 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -503,6 +503,70 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } EXPORT_SYMBOL_GPL(vmbus_establish_gpadl); +/** + * request_arr_init - Allocates memory for the requestor array. Each slot + * keeps track of the next available slot in the array. Initially, each + * slot points to the next one (as in a Linked List). The last slot + * does not point to anything, so its value is U64_MAX by default. + * @size The size of the array + */ +static u64 *request_arr_init(u32 size) +{ + int i; + u64 *req_arr; + + req_arr = kcalloc(size, sizeof(u64), GFP_KERNEL); + if (!req_arr) + return NULL; + + for (i = 0; i < size - 1; i++) + req_arr[i] = i + 1; + + /* Last slot (no more available slots) */ + req_arr[i] = U64_MAX; + + return req_arr; +} + +/* + * vmbus_alloc_requestor - Initializes @rqstor's fields. + * Index 0 is the first free slot + * @size: Size of the requestor array + */ +static int vmbus_alloc_requestor(struct vmbus_requestor *rqstor, u32 size) +{ + u64 *rqst_arr; + unsigned long *bitmap; + + rqst_arr = request_arr_init(size); + if (!rqst_arr) + return -ENOMEM; + + bitmap = bitmap_zalloc(size, GFP_KERNEL); + if (!bitmap) { + kfree(rqst_arr); + return -ENOMEM; + } + + rqstor->req_arr = rqst_arr; + rqstor->req_bitmap = bitmap; + rqstor->size = size; + rqstor->next_request_id = 0; + spin_lock_init(&rqstor->req_lock); + + return 0; +} + +/* + * vmbus_free_requestor - Frees memory allocated for @rqstor + * @rqstor: Pointer to the requestor struct + */ +static void vmbus_free_requestor(struct vmbus_requestor *rqstor) +{ + kfree(rqstor->req_arr); + bitmap_free(rqstor->req_bitmap); +} + static int __vmbus_open(struct vmbus_channel *newchannel, void *userdata, u32 userdatalen, void (*onchannelcallback)(void *context), void *context) @@ -523,6 +587,12 @@ static int __vmbus_open(struct vmbus_channel *newchannel, if (newchannel->state != CHANNEL_OPEN_STATE) return -EINVAL; + /* Create and init requestor */ + if (newchannel->rqstor_size) { + if (vmbus_alloc_requestor(&newchannel->requestor, newchannel->rqstor_size)) + return -ENOMEM; + } + newchannel->state = CHANNEL_OPENING_STATE; newchannel->onchannel_callback = onchannelcallback; newchannel->channel_callback_context = context; @@ -626,6 +696,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel, error_clean_ring: hv_ringbuffer_cleanup(&newchannel->outbound); hv_ringbuffer_cleanup(&newchannel->inbound); + vmbus_free_requestor(&newchannel->requestor); newchannel->state = CHANNEL_OPEN_STATE; return err; } @@ -808,6 +879,9 @@ static int vmbus_close_internal(struct vmbus_channel *channel) channel->ringbuffer_gpadlhandle = 0; } + if (!ret) + vmbus_free_requestor(&channel->requestor); + return ret; } @@ -888,7 +962,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, /* in 8-bytes granularity */ desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3; desc.len8 = (u16)(packetlen_aligned >> 3); - desc.trans_id = requestid; + desc.trans_id = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ bufferlist[0].iov_base = &desc; bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor); @@ -897,7 +971,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - return hv_ringbuffer_write(channel, bufferlist, num_vecs); + return hv_ringbuffer_write(channel, bufferlist, num_vecs, requestid); } EXPORT_SYMBOL(vmbus_sendpacket); @@ -939,7 +1013,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */ desc.length8 = (u16)(packetlen_aligned >> 3); - desc.transactionid = requestid; + desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ desc.reserved = 0; desc.rangecount = pagecount; @@ -956,7 +1030,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - return hv_ringbuffer_write(channel, bufferlist, 3); + return hv_ringbuffer_write(channel, bufferlist, 3, requestid); } EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); @@ -983,7 +1057,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; desc->dataoffset8 = desc_size >> 3; /* in 8-bytes granularity */ desc->length8 = (u16)(packetlen_aligned >> 3); - desc->transactionid = requestid; + desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ desc->reserved = 0; desc->rangecount = 1; @@ -994,7 +1068,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - return hv_ringbuffer_write(channel, bufferlist, 3); + return hv_ringbuffer_write(channel, bufferlist, 3, requestid); } EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); @@ -1042,3 +1116,91 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer, buffer_actual_len, requestid, true); } EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw); + +/* + * vmbus_next_request_id - Returns a new request id. It is also + * the index at which the guest memory address is stored. + * Uses a spin lock to avoid race conditions. + * @rqstor: Pointer to the requestor struct + * @rqst_add: Guest memory address to be stored in the array + */ +u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr) +{ + unsigned long flags; + u64 current_id; + const struct vmbus_channel *channel = + container_of(rqstor, const struct vmbus_channel, requestor); + + /* Check rqstor has been initialized */ + if (!channel->rqstor_size) + return VMBUS_NO_RQSTOR; + + spin_lock_irqsave(&rqstor->req_lock, flags); + current_id = rqstor->next_request_id; + + /* Requestor array is full */ + if (current_id >= rqstor->size) { + spin_unlock_irqrestore(&rqstor->req_lock, flags); + return VMBUS_RQST_ERROR; + } + + rqstor->next_request_id = rqstor->req_arr[current_id]; + rqstor->req_arr[current_id] = rqst_addr; + + /* The already held spin lock provides atomicity */ + bitmap_set(rqstor->req_bitmap, current_id, 1); + + spin_unlock_irqrestore(&rqstor->req_lock, flags); + + /* + * Cannot return an ID of 0, which is reserved for an unsolicited + * message from Hyper-V. + */ + return current_id + 1; +} +EXPORT_SYMBOL_GPL(vmbus_next_request_id); + +/* + * vmbus_request_addr - Returns the memory address stored at @trans_id + * in @rqstor. Uses a spin lock to avoid race conditions. + * @rqstor: Pointer to the requestor struct + * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's + * next request id. + */ +u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id) +{ + unsigned long flags; + u64 req_addr; + const struct vmbus_channel *channel = + container_of(rqstor, const struct vmbus_channel, requestor); + + /* Check rqstor has been initialized */ + if (!channel->rqstor_size) + return VMBUS_NO_RQSTOR; + + /* Hyper-V can send an unsolicited message with ID of 0 */ + if (!trans_id) + return trans_id; + + spin_lock_irqsave(&rqstor->req_lock, flags); + + /* Data corresponding to trans_id is stored at trans_id - 1 */ + trans_id--; + + /* Invalid trans_id */ + if (trans_id >= rqstor->size || !test_bit(trans_id, rqstor->req_bitmap)) { + spin_unlock_irqrestore(&rqstor->req_lock, flags); + return VMBUS_RQST_ERROR; + } + + req_addr = rqstor->req_arr[trans_id]; + rqstor->req_arr[trans_id] = rqstor->next_request_id; + rqstor->next_request_id = trans_id; + + /* The already held spin lock provides atomicity */ + bitmap_clear(rqstor->req_bitmap, trans_id, 1); + + spin_unlock_irqrestore(&rqstor->req_lock, flags); + return req_addr; +} +EXPORT_SYMBOL_GPL(vmbus_request_addr); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 7845fa5de79e..601660bca5d4 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -180,7 +180,8 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); int hv_ringbuffer_write(struct vmbus_channel *channel, - const struct kvec *kv_list, u32 kv_count); + const struct kvec *kv_list, u32 kv_count, + u64 requestid); int hv_ringbuffer_read(struct vmbus_channel *channel, void *buffer, u32 buflen, u32 *buffer_actual_len, diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 7ed6fad3fa8f..a0ba6ac48736 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -261,7 +261,8 @@ EXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy); /* Write to the ring buffer. */ int hv_ringbuffer_write(struct vmbus_channel *channel, - const struct kvec *kv_list, u32 kv_count) + const struct kvec *kv_list, u32 kv_count, + u64 requestid) { int i; u32 bytes_avail_towrite; @@ -271,6 +272,8 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, u64 prev_indices; unsigned long flags; struct hv_ring_buffer_info *outring_info = &channel->outbound; + struct vmpacket_descriptor *desc = kv_list[0].iov_base; + u64 rqst_id = VMBUS_NO_RQSTOR; if (channel->rescind) return -ENODEV; @@ -313,6 +316,23 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, kv_list[i].iov_len); } + /* + * Allocate the request ID after the data has been copied into the + * ring buffer. Once this request ID is allocated, the completion + * path could find the data and free it. + */ + + if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) { + rqst_id = vmbus_next_request_id(&channel->requestor, requestid); + if (rqst_id == VMBUS_RQST_ERROR) { + spin_unlock_irqrestore(&outring_info->ring_lock, flags); + pr_err("No request id available\n"); + return -EAGAIN; + } + } + desc = hv_get_ring_buffer(outring_info) + old_write; + desc->trans_id = (rqst_id == VMBUS_NO_RQSTOR) ? requestid : rqst_id; + /* Set previous packet start */ prev_indices = hv_get_ring_bufferindices(outring_info); @@ -332,8 +352,13 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, hv_signal_on_write(old_write, channel); - if (channel->rescind) + if (channel->rescind) { + if (rqst_id != VMBUS_NO_RQSTOR) { + /* Reclaim request ID to avoid leak of IDs */ + vmbus_request_addr(&channel->requestor, rqst_id); + } return -ENODEV; + } return 0; } diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index eada4d8d6587..4cb65a79d92f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -764,6 +764,22 @@ enum vmbus_device_type { HV_UNKNOWN, }; +/* + * Provides request ids for VMBus. Encapsulates guest memory + * addresses and stores the next available slot in req_arr + * to generate new ids in constant time. + */ +struct vmbus_requestor { + u64 *req_arr; + unsigned long *req_bitmap; /* is a given slot available? */ + u32 size; + u64 next_request_id; + spinlock_t req_lock; /* provides atomicity */ +}; + +#define VMBUS_NO_RQSTOR U64_MAX +#define VMBUS_RQST_ERROR (U64_MAX - 1) + struct vmbus_device { u16 dev_type; guid_t guid; @@ -988,8 +1004,14 @@ struct vmbus_channel { u32 fuzz_testing_interrupt_delay; u32 fuzz_testing_message_delay; + /* request/transaction ids for VMBus */ + struct vmbus_requestor requestor; + u32 rqstor_size; }; +u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr); +u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id); + static inline bool is_hvsock_channel(const struct vmbus_channel *c) { return !!(c->offermsg.offer.chn_flags & From cdba035680505d069c937068386b7832de3c1f90 Mon Sep 17 00:00:00 2001 From: Andres Beltran Date: Mon, 9 Nov 2020 11:04:02 +0100 Subject: [PATCH 52/71] hv_netvsc: Use vmbus_requestor to generate transaction IDs for VMBus hardening [ Upstream commit 4d18fcc95f50950a99bd940d4e61a983f91d267a ] Currently, pointers to guest memory are passed to Hyper-V as transaction IDs in netvsc. In the face of errors or malicious behavior in Hyper-V, netvsc should not expose or trust the transaction IDs returned by Hyper-V to be valid guest memory addresses. Instead, use small integers generated by vmbus_requestor as requests (transaction) IDs. Signed-off-by: Andres Beltran Co-developed-by: Andrea Parri (Microsoft) Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Acked-by: Jakub Kicinski Reviewed-by: Wei Liu Cc: "David S. Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20201109100402.8946-4-parri.andrea@gmail.com Signed-off-by: Wei Liu Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") Signed-off-by: Sasha Levin --- drivers/net/hyperv/hyperv_net.h | 13 +++++++++++++ drivers/net/hyperv/netvsc.c | 22 ++++++++++++++++------ drivers/net/hyperv/rndis_filter.c | 1 + include/linux/hyperv.h | 1 + 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 367878493e70..15652d7951f9 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -847,6 +847,19 @@ struct nvsp_message { #define NETVSC_XDP_HDRM 256 +#define NETVSC_MIN_OUT_MSG_SIZE (sizeof(struct vmpacket_descriptor) + \ + sizeof(struct nvsp_message)) +#define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor) + +/* Estimated requestor size: + * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size + */ +static inline u32 netvsc_rqstor_size(unsigned long ringbytes) +{ + return ringbytes / NETVSC_MIN_OUT_MSG_SIZE + + ringbytes / NETVSC_MIN_IN_MSG_SIZE; +} + #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \ (offsetof(struct vmtransfer_page_packet_header, ranges) + \ (rng_cnt) * sizeof(struct vmtransfer_page_range)) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 3eae31c0f97a..c9b73a044881 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -50,7 +50,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) vmbus_sendpacket(dev->channel, init_pkt, sizeof(struct nvsp_message), - (unsigned long)init_pkt, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); } @@ -163,7 +163,7 @@ static void netvsc_revoke_recv_buf(struct hv_device *device, ret = vmbus_sendpacket(device->channel, revoke_packet, sizeof(struct nvsp_message), - (unsigned long)revoke_packet, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); /* If the failure is because the channel is rescinded; * ignore the failure since we cannot send on a rescinded @@ -213,7 +213,7 @@ static void netvsc_revoke_send_buf(struct hv_device *device, ret = vmbus_sendpacket(device->channel, revoke_packet, sizeof(struct nvsp_message), - (unsigned long)revoke_packet, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); /* If the failure is because the channel is rescinded; @@ -557,7 +557,7 @@ static int negotiate_nvsp_ver(struct hv_device *device, ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message), - (unsigned long)init_packet, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); return ret; @@ -614,7 +614,7 @@ static int netvsc_connect_vsp(struct hv_device *device, /* Send the init request */ ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message), - (unsigned long)init_packet, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); if (ret != 0) goto cleanup; @@ -698,10 +698,19 @@ static void netvsc_send_tx_complete(struct net_device *ndev, const struct vmpacket_descriptor *desc, int budget) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct sk_buff *skb; u16 q_idx = 0; int queue_sends; + u64 cmd_rqst; + + cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id); + if (cmd_rqst == VMBUS_RQST_ERROR) { + netdev_err(ndev, "Incorrect transaction id\n"); + return; + } + + skb = (struct sk_buff *)(unsigned long)cmd_rqst; /* Notify the layer above us */ if (likely(skb)) { @@ -1530,6 +1539,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netvsc_poll, NAPI_POLL_WEIGHT); /* Open the channel */ + device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); ret = vmbus_open(device->channel, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, net_device->chan_table); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 90bc0008fa2f..13f62950eeb9 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1170,6 +1170,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) /* Set the channel before opening.*/ nvchan->channel = new_sc; + new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); ret = vmbus_open(new_sc, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 4cb65a79d92f..2aaf450c8d80 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -779,6 +779,7 @@ struct vmbus_requestor { #define VMBUS_NO_RQSTOR U64_MAX #define VMBUS_RQST_ERROR (U64_MAX - 1) +#define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2) struct vmbus_device { u16 dev_type; From 0db98ee09b9ca271548de2379f1091d6f37b3b60 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 8 Jan 2021 16:53:42 -0800 Subject: [PATCH 53/71] hv_netvsc: Wait for completion on request SWITCH_DATA_PATH [ Upstream commit 8b31f8c982b738e4130539e47f03967c599d8e22 ] The completion indicates if NVSP_MSG4_TYPE_SWITCH_DATA_PATH has been processed by the VSP. The traffic is steered to VF or synthetic after we receive this completion. Signed-off-by: Long Li Reported-by: kernel test robot Signed-off-by: Jakub Kicinski Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc.c | 37 ++++++++++++++++++++++++++++++--- drivers/net/hyperv/netvsc_drv.c | 1 - 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index c9b73a044881..03333a4136bf 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -37,6 +37,10 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; + /* Block sending traffic to VF if it's about to be gone */ + if (!vf) + net_device_ctx->data_path_is_vf = vf; + memset(init_pkt, 0, sizeof(struct nvsp_message)); init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH; if (vf) @@ -50,8 +54,11 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) vmbus_sendpacket(dev->channel, init_pkt, sizeof(struct nvsp_message), - VMBUS_RQST_ID_NO_RESPONSE, - VM_PKT_DATA_INBAND, 0); + (unsigned long)init_pkt, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + wait_for_completion(&nv_dev->channel_init_wait); + net_device_ctx->data_path_is_vf = vf; } /* Worker to setup sub channels on initial setup @@ -757,8 +764,31 @@ static void netvsc_send_completion(struct net_device *ndev, const struct vmpacket_descriptor *desc, int budget) { - const struct nvsp_message *nvsp_packet = hv_pkt_data(desc); + const struct nvsp_message *nvsp_packet; u32 msglen = hv_pkt_datalen(desc); + struct nvsp_message *pkt_rqst; + u64 cmd_rqst; + + /* First check if this is a VMBUS completion without data payload */ + if (!msglen) { + cmd_rqst = vmbus_request_addr(&incoming_channel->requestor, + (u64)desc->trans_id); + if (cmd_rqst == VMBUS_RQST_ERROR) { + netdev_err(ndev, "Invalid transaction id\n"); + return; + } + + pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst; + switch (pkt_rqst->hdr.msg_type) { + case NVSP_MSG4_TYPE_SWITCH_DATA_PATH: + complete(&net_device->channel_init_wait); + break; + + default: + netdev_err(ndev, "Unexpected VMBUS completion!!\n"); + } + return; + } /* Ensure packet is big enough to read header fields */ if (msglen < sizeof(struct nvsp_message_header)) { @@ -766,6 +796,7 @@ static void netvsc_send_completion(struct net_device *ndev, return; } + nvsp_packet = hv_pkt_data(desc); switch (nvsp_packet->hdr.msg_type) { case NVSP_MSG_TYPE_INIT_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 790bf750281a..57a5ec098e7e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2429,7 +2429,6 @@ static int netvsc_vf_changed(struct net_device *vf_netdev) if (net_device_ctx->data_path_is_vf == vf_is_up) return NOTIFY_OK; - net_device_ctx->data_path_is_vf = vf_is_up; if (vf_is_up && !net_device_ctx->vf_alloc) { netdev_info(ndev, "Waiting for the VF association from host\n"); From 3cfee5668b3530c8ef087e141a639f721bd4d231 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 8 Jan 2021 16:53:43 -0800 Subject: [PATCH 54/71] hv_netvsc: Process NETDEV_GOING_DOWN on VF hot remove [ Upstream commit 34b06a2eee44d469f2e2c013a83e6dac3aff6411 ] On VF hot remove, NETDEV_GOING_DOWN is sent to notify the VF is about to go down. At this time, the VF is still sending/receiving traffic and we request the VSP to switch datapath. On completion, the datapath is switched to synthetic and we can proceed with VF hot remove. Signed-off-by: Long Li Reviewed-by: Haiyang Zhang Signed-off-by: Jakub Kicinski Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc_drv.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 57a5ec098e7e..057b1a9dde15 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2411,12 +2411,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network * interface, there is only the CHANGE event and no UP or DOWN event. */ -static int netvsc_vf_changed(struct net_device *vf_netdev) +static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; struct net_device *ndev; - bool vf_is_up = netif_running(vf_netdev); + bool vf_is_up = false; + + if (event != NETDEV_GOING_DOWN) + vf_is_up = netif_running(vf_netdev); ndev = get_netvsc_byref(vf_netdev); if (!ndev) @@ -2762,7 +2765,8 @@ static int netvsc_netdev_event(struct notifier_block *this, case NETDEV_UP: case NETDEV_DOWN: case NETDEV_CHANGE: - return netvsc_vf_changed(event_dev); + case NETDEV_GOING_DOWN: + return netvsc_vf_changed(event_dev, event); default: return NOTIFY_DONE; } From 0d54d2240da7ecd5a8e05bb1caef1a57217a7cf6 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 23 Apr 2021 18:12:35 -0700 Subject: [PATCH 55/71] hv_netvsc: Make netvsc/VF binding check both MAC and serial number [ Upstream commit 64ff412ad41fe3a5bf759ff4844dc1382176485c ] Currently the netvsc/VF binding logic only checks the PCI serial number. The Microsoft Azure Network Adapter (MANA) supports multiple net_device interfaces (each such interface is called a "vPort", and has its unique MAC address) which are backed by the same VF PCI device, so the binding logic should check both the MAC address and the PCI serial number. The change should not break any other existing VF drivers, because Hyper-V NIC SR-IOV implementation requires the netvsc network interface and the VF network interface have the same MAC address. Co-developed-by: Haiyang Zhang Signed-off-by: Haiyang Zhang Co-developed-by: Shachar Raindel Signed-off-by: Shachar Raindel Acked-by: Stephen Hemminger Signed-off-by: Dexuan Cui Signed-off-by: David S. Miller Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc_drv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 057b1a9dde15..9ec1633b89b4 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2310,8 +2310,17 @@ static struct net_device *get_netvsc_byslot(const struct net_device *vf_netdev) if (!ndev_ctx->vf_alloc) continue; - if (ndev_ctx->vf_serial == serial) - return hv_get_drvdata(ndev_ctx->device_ctx); + if (ndev_ctx->vf_serial != serial) + continue; + + ndev = hv_get_drvdata(ndev_ctx->device_ctx); + if (ndev->addr_len != vf_netdev->addr_len || + memcmp(ndev->perm_addr, vf_netdev->perm_addr, + ndev->addr_len) != 0) + continue; + + return ndev; + } /* Fallback path to check synthetic vf with help of mac addr. From 8f41b33d240ed2b21234c193ccc5aa4b37a8d3be Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Sun, 10 Oct 2021 13:03:28 +0900 Subject: [PATCH 56/71] hv_netvsc: use netif_is_bond_master() instead of open code [ Upstream commit c60882a4566a0a62dc3a40c85131103aad83dcb3 ] Use netif_is_bond_master() function instead of open code, which is ((event_dev->priv_flags & IFF_BONDING) && (event_dev->flags & IFF_MASTER)). This patch doesn't change logic. Signed-off-by: Juhee Kang Signed-off-by: David S. Miller Stable-dep-of: 9cae43da9867 ("hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed") Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc_drv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 9ec1633b89b4..e8efcc6a0b05 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2760,8 +2760,7 @@ static int netvsc_netdev_event(struct notifier_block *this, return NOTIFY_DONE; /* Avoid Bonding master dev with same MAC registering as VF */ - if ((event_dev->priv_flags & IFF_BONDING) && - (event_dev->flags & IFF_MASTER)) + if (netif_is_bond_master(event_dev)) return NOTIFY_DONE; switch (event) { From 5b10a88f64c0315cfdef45de0aaaa4eef57de0b7 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Thu, 1 Feb 2024 20:40:38 -0800 Subject: [PATCH 57/71] hv_netvsc: Register VF in netvsc_probe if NET_DEVICE_REGISTER missed [ Upstream commit 9cae43da9867412f8bd09aee5c8a8dc5e8dc3dc2 ] If hv_netvsc driver is unloaded and reloaded, the NET_DEVICE_REGISTER handler cannot perform VF register successfully as the register call is received before netvsc_probe is finished. This is because we register register_netdevice_notifier() very early( even before vmbus_driver_register()). To fix this, we try to register each such matching VF( if it is visible as a netdevice) at the end of netvsc_probe. Cc: stable@vger.kernel.org Fixes: 85520856466e ("hv_netvsc: Fix race of register_netdevice_notifier and VF register") Suggested-by: Dexuan Cui Signed-off-by: Shradha Gupta Reviewed-by: Haiyang Zhang Reviewed-by: Dexuan Cui Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hyperv/netvsc_drv.c | 82 +++++++++++++++++++++++++-------- 1 file changed, 62 insertions(+), 20 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e8efcc6a0b05..0fc0f9cb3f34 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -44,6 +44,10 @@ #define LINKCHANGE_INT (2 * HZ) #define VF_TAKEOVER_INT (HZ / 10) +/* Macros to define the context of vf registration */ +#define VF_REG_IN_PROBE 1 +#define VF_REG_IN_NOTIFIER 2 + static unsigned int ring_size __ro_after_init = 128; module_param(ring_size, uint, 0444); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); @@ -2194,7 +2198,7 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) } static int netvsc_vf_join(struct net_device *vf_netdev, - struct net_device *ndev) + struct net_device *ndev, int context) { struct net_device_context *ndev_ctx = netdev_priv(ndev); int ret; @@ -2217,7 +2221,11 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto upper_link_failed; } - schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); + /* If this registration is called from probe context vf_takeover + * is taken care of later in probe itself. + */ + if (context == VF_REG_IN_NOTIFIER) + schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT); call_netdevice_notifiers(NETDEV_JOIN, vf_netdev); @@ -2355,7 +2363,7 @@ static int netvsc_prepare_bonding(struct net_device *vf_netdev) return NOTIFY_DONE; } -static int netvsc_register_vf(struct net_device *vf_netdev) +static int netvsc_register_vf(struct net_device *vf_netdev, int context) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; @@ -2395,7 +2403,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); - if (netvsc_vf_join(vf_netdev, ndev) != 0) + if (netvsc_vf_join(vf_netdev, ndev, context) != 0) return NOTIFY_DONE; dev_hold(vf_netdev); @@ -2479,10 +2487,31 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static int check_dev_is_matching_vf(struct net_device *event_ndev) +{ + /* Skip NetVSC interfaces */ + if (event_ndev->netdev_ops == &device_ops) + return -ENODEV; + + /* Avoid non-Ethernet type devices */ + if (event_ndev->type != ARPHRD_ETHER) + return -ENODEV; + + /* Avoid Vlan dev with same MAC registering as VF */ + if (is_vlan_dev(event_ndev)) + return -ENODEV; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (netif_is_bond_master(event_ndev)) + return -ENODEV; + + return 0; +} + static int netvsc_probe(struct hv_device *dev, const struct hv_vmbus_device_id *dev_id) { - struct net_device *net = NULL; + struct net_device *net = NULL, *vf_netdev; struct net_device_context *net_device_ctx; struct netvsc_device_info *device_info = NULL; struct netvsc_device *nvdev; @@ -2590,6 +2619,30 @@ static int netvsc_probe(struct hv_device *dev, } list_add(&net_device_ctx->list, &netvsc_dev_list); + + /* When the hv_netvsc driver is unloaded and reloaded, the + * NET_DEVICE_REGISTER for the vf device is replayed before probe + * is complete. This is because register_netdevice_notifier() gets + * registered before vmbus_driver_register() so that callback func + * is set before probe and we don't miss events like NETDEV_POST_INIT + * So, in this section we try to register the matching vf device that + * is present as a netdevice, knowing that its register call is not + * processed in the netvsc_netdev_notifier(as probing is progress and + * get_netvsc_byslot fails). + */ + for_each_netdev(dev_net(net), vf_netdev) { + ret = check_dev_is_matching_vf(vf_netdev); + if (ret != 0) + continue; + + if (net != get_netvsc_byslot(vf_netdev)) + continue; + + netvsc_prepare_bonding(vf_netdev); + netvsc_register_vf(vf_netdev, VF_REG_IN_PROBE); + __netvsc_vf_setup(net, vf_netdev); + break; + } rtnl_unlock(); netvsc_devinfo_put(device_info); @@ -2746,28 +2799,17 @@ static int netvsc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + int ret = 0; - /* Skip our own events */ - if (event_dev->netdev_ops == &device_ops) - return NOTIFY_DONE; - - /* Avoid non-Ethernet type devices */ - if (event_dev->type != ARPHRD_ETHER) - return NOTIFY_DONE; - - /* Avoid Vlan dev with same MAC registering as VF */ - if (is_vlan_dev(event_dev)) - return NOTIFY_DONE; - - /* Avoid Bonding master dev with same MAC registering as VF */ - if (netif_is_bond_master(event_dev)) + ret = check_dev_is_matching_vf(event_dev); + if (ret != 0) return NOTIFY_DONE; switch (event) { case NETDEV_POST_INIT: return netvsc_prepare_bonding(event_dev); case NETDEV_REGISTER: - return netvsc_register_vf(event_dev); + return netvsc_register_vf(event_dev, VF_REG_IN_NOTIFIER); case NETDEV_UNREGISTER: return netvsc_unregister_vf(event_dev); case NETDEV_UP: From 05edf43452c01f2f91f7589aa8bff8d5d0ae13a5 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 24 Feb 2021 12:09:54 -0800 Subject: [PATCH 58/71] mm/hugetlb: change hugetlb_reserve_pages() to type bool [ Upstream commit 33b8f84a4ee78491a8f4f9e4c5520c9da4a10983 ] While reviewing a bug in hugetlb_reserve_pages, it was noticed that all callers ignore the return value. Any failure is considered an ENOMEM error by the callers. Change the function to be of type bool. The function will return true if the reservation was successful, false otherwise. Callers currently assume a zero return code indicates success. Change the callers to look for true to indicate success. No functional change, only code cleanup. Link: https://lkml.kernel.org/r/20201221192542.15732-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Reviewed-by: Matthew Wilcox (Oracle) Cc: David Hildenbrand Cc: Dan Carpenter Cc: Michal Hocko Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Stable-dep-of: e656c7a9e596 ("mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE") Signed-off-by: Sasha Levin --- fs/hugetlbfs/inode.c | 4 ++-- include/linux/hugetlb.h | 2 +- mm/hugetlb.c | 37 ++++++++++++++----------------------- 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a0edd4b8fa18..c3e9fa7ce75f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -176,7 +176,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); ret = -ENOMEM; - if (hugetlb_reserve_pages(inode, + if (!hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, vma->vm_flags)) @@ -1500,7 +1500,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, inode->i_size = size; clear_nlink(inode); - if (hugetlb_reserve_pages(inode, 0, + if (!hugetlb_reserve_pages(inode, 0, size >> huge_page_shift(hstate_inode(inode)), NULL, acctflag)) file = ERR_PTR(-ENOMEM); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 99b73fc4a824..90c66b9458c3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -140,7 +140,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, unsigned long dst_addr, unsigned long src_addr, struct page **pagep); -int hugetlb_reserve_pages(struct inode *inode, long from, long to, +bool hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 81949f6d29af..02b7c8f9b0e8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5108,12 +5108,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, return pages << h->order; } -int hugetlb_reserve_pages(struct inode *inode, +/* Return true if reservation was successful, false otherwise. */ +bool hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags) { - long ret, chg, add = -1; + long chg, add = -1; struct hstate *h = hstate_inode(inode); struct hugepage_subpool *spool = subpool_inode(inode); struct resv_map *resv_map; @@ -5123,7 +5124,7 @@ int hugetlb_reserve_pages(struct inode *inode, /* This should never happen */ if (from > to) { VM_WARN(1, "%s called with a negative range\n", __func__); - return -EINVAL; + return false; } /* @@ -5132,7 +5133,7 @@ int hugetlb_reserve_pages(struct inode *inode, * without using reserves */ if (vm_flags & VM_NORESERVE) - return 0; + return true; /* * Shared mappings base their reservation on the number of pages that @@ -5154,7 +5155,7 @@ int hugetlb_reserve_pages(struct inode *inode, /* Private mapping. */ resv_map = resv_map_alloc(); if (!resv_map) - return -ENOMEM; + return false; chg = to - from; @@ -5162,18 +5163,12 @@ int hugetlb_reserve_pages(struct inode *inode, set_vma_resv_flags(vma, HPAGE_RESV_OWNER); } - if (chg < 0) { - ret = chg; + if (chg < 0) goto out_err; - } - ret = hugetlb_cgroup_charge_cgroup_rsvd( - hstate_index(h), chg * pages_per_huge_page(h), &h_cg); - - if (ret < 0) { - ret = -ENOMEM; + if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h), + chg * pages_per_huge_page(h), &h_cg) < 0) goto out_err; - } if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) { /* For private mappings, the hugetlb_cgroup uncharge info hangs @@ -5188,19 +5183,15 @@ int hugetlb_reserve_pages(struct inode *inode, * reservations already in place (gbl_reserve). */ gbl_reserve = hugepage_subpool_get_pages(spool, chg); - if (gbl_reserve < 0) { - ret = -ENOSPC; + if (gbl_reserve < 0) goto out_uncharge_cgroup; - } /* * Check enough hugepages are available for the reservation. * Hand the pages back to the subpool if there are not */ - ret = hugetlb_acct_memory(h, gbl_reserve); - if (ret < 0) { + if (hugetlb_acct_memory(h, gbl_reserve) < 0) goto out_put_pages; - } /* * Account for the reservations made. Shared mappings record regions @@ -5218,7 +5209,6 @@ int hugetlb_reserve_pages(struct inode *inode, if (unlikely(add < 0)) { hugetlb_acct_memory(h, -gbl_reserve); - ret = add; goto out_put_pages; } else if (unlikely(chg > add)) { /* @@ -5251,7 +5241,8 @@ int hugetlb_reserve_pages(struct inode *inode, hugetlb_cgroup_put_rsvd_cgroup(h_cg); } } - return 0; + return true; + out_put_pages: /* put back original number of pages, chg */ (void)hugepage_subpool_put_pages(spool, chg); @@ -5267,7 +5258,7 @@ int hugetlb_reserve_pages(struct inode *inode, region_abort(resv_map, from, to, regions_needed); if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) kref_put(&resv_map->refs, resv_map_release); - return ret; + return false; } long hugetlb_unreserve_pages(struct inode *inode, long start, long end, From 14136bed4104392daef53d52dd7abbc1c8e04ddf Mon Sep 17 00:00:00 2001 From: Prakash Sangappa Date: Tue, 23 Jan 2024 12:04:42 -0800 Subject: [PATCH 59/71] mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE [ Upstream commit e656c7a9e59607d1672d85ffa9a89031876ffe67 ] For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in shmget() call. If SHM_NORESERVE flags is specified then the hugetlb pages are not reserved. However when the shared memory is attached with the shmat() call the hugetlb pages are getting reserved incorrectly for SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug. ------------------------------- Following test shows the issue. $cat shmhtb.c int main() { int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE; int shmid; shmid = shmget(SKEY, SHMSZ, shmflags); if (shmid < 0) { printf("shmat: shmget() failed, %d\n", errno); return 1; } printf("After shmget()\n"); system("cat /proc/meminfo | grep -i hugepages_"); shmat(shmid, NULL, 0); printf("\nAfter shmat()\n"); system("cat /proc/meminfo | grep -i hugepages_"); shmctl(shmid, IPC_RMID, NULL); return 0; } #sysctl -w vm.nr_hugepages=20 #./shmhtb After shmget() HugePages_Total: 20 HugePages_Free: 20 HugePages_Rsvd: 0 HugePages_Surp: 0 After shmat() HugePages_Total: 20 HugePages_Free: 20 HugePages_Rsvd: 5 <-- HugePages_Surp: 0 -------------------------------- Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared memory in the shmat() call. Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangappa@oracle.com Signed-off-by: Prakash Sangappa Acked-by: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- fs/hugetlbfs/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c3e9fa7ce75f..bf3cda498962 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -135,6 +135,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); + vm_flags_t vm_flags; /* * vma address alignment (but not the pgoff alignment) has @@ -176,10 +177,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); ret = -ENOMEM; + + vm_flags = vma->vm_flags; + /* + * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip + * reserving here. Note: only for SHM hugetlbfs file, the inode + * flag S_PRIVATE is set. + */ + if (inode->i_flags & S_PRIVATE) + vm_flags |= VM_NORESERVE; + if (!hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, - vma->vm_flags)) + vm_flags)) goto out; ret = 0; From 811415fe768f289e40818e5ddd4563d9c8923a7c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Sep 2023 19:25:54 +0200 Subject: [PATCH 60/71] getrusage: add the "signal_struct *sig" local variable [ Upstream commit c7ac8231ace9b07306d0299969e42073b189c70a ] No functional changes, cleanup/preparation. Link: https://lkml.kernel.org/r/20230909172554.GA20441@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Signed-off-by: Andrew Morton Stable-dep-of: daa694e41375 ("getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand()") Signed-off-by: Sasha Levin --- kernel/sys.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index bff14910b926..8a53d858d737 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1737,6 +1737,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unsigned long flags; u64 tgutime, tgstime, utime, stime; unsigned long maxrss = 0; + struct signal_struct *sig = p->signal; memset((char *)r, 0, sizeof (*r)); utime = stime = 0; @@ -1744,7 +1745,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); - maxrss = p->signal->maxrss; + maxrss = sig->maxrss; goto out; } @@ -1754,15 +1755,15 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) switch (who) { case RUSAGE_BOTH: case RUSAGE_CHILDREN: - utime = p->signal->cutime; - stime = p->signal->cstime; - r->ru_nvcsw = p->signal->cnvcsw; - r->ru_nivcsw = p->signal->cnivcsw; - r->ru_minflt = p->signal->cmin_flt; - r->ru_majflt = p->signal->cmaj_flt; - r->ru_inblock = p->signal->cinblock; - r->ru_oublock = p->signal->coublock; - maxrss = p->signal->cmaxrss; + utime = sig->cutime; + stime = sig->cstime; + r->ru_nvcsw = sig->cnvcsw; + r->ru_nivcsw = sig->cnivcsw; + r->ru_minflt = sig->cmin_flt; + r->ru_majflt = sig->cmaj_flt; + r->ru_inblock = sig->cinblock; + r->ru_oublock = sig->coublock; + maxrss = sig->cmaxrss; if (who == RUSAGE_CHILDREN) break; @@ -1772,14 +1773,14 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) thread_group_cputime_adjusted(p, &tgutime, &tgstime); utime += tgutime; stime += tgstime; - r->ru_nvcsw += p->signal->nvcsw; - r->ru_nivcsw += p->signal->nivcsw; - r->ru_minflt += p->signal->min_flt; - r->ru_majflt += p->signal->maj_flt; - r->ru_inblock += p->signal->inblock; - r->ru_oublock += p->signal->oublock; - if (maxrss < p->signal->maxrss) - maxrss = p->signal->maxrss; + r->ru_nvcsw += sig->nvcsw; + r->ru_nivcsw += sig->nivcsw; + r->ru_minflt += sig->min_flt; + r->ru_majflt += sig->maj_flt; + r->ru_inblock += sig->inblock; + r->ru_oublock += sig->oublock; + if (maxrss < sig->maxrss) + maxrss = sig->maxrss; t = p; do { accumulate_thread_rusage(t, r); From 21677f35e10488237faa08cdac298ee8edc4bc8d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Jan 2024 16:50:50 +0100 Subject: [PATCH 61/71] getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand() [ Upstream commit daa694e4137571b4ebec330f9a9b4d54aa8b8089 ] Patch series "getrusage: use sig->stats_lock", v2. This patch (of 2): thread_group_cputime() does its own locking, we can safely shift thread_group_cputime_adjusted() which does another for_each_thread loop outside of ->siglock protected section. This is also preparation for the next patch which changes getrusage() to use stats_lock instead of siglock, thread_group_cputime() takes the same lock. With the current implementation recursive read_seqbegin_or_lock() is fine, thread_group_cputime() can't enter the slow mode if the caller holds stats_lock, yet this looks more safe and better performance-wise. Link: https://lkml.kernel.org/r/20240122155023.GA26169@redhat.com Link: https://lkml.kernel.org/r/20240122155050.GA26205@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dylan Hatch Tested-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/sys.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 8a53d858d737..26c8783bd075 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1736,17 +1736,19 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) struct task_struct *t; unsigned long flags; u64 tgutime, tgstime, utime, stime; - unsigned long maxrss = 0; + unsigned long maxrss; + struct mm_struct *mm; struct signal_struct *sig = p->signal; - memset((char *)r, 0, sizeof (*r)); + memset(r, 0, sizeof(*r)); utime = stime = 0; + maxrss = 0; if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); maxrss = sig->maxrss; - goto out; + goto out_thread; } if (!lock_task_sighand(p, &flags)) @@ -1770,9 +1772,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) fallthrough; case RUSAGE_SELF: - thread_group_cputime_adjusted(p, &tgutime, &tgstime); - utime += tgutime; - stime += tgstime; r->ru_nvcsw += sig->nvcsw; r->ru_nivcsw += sig->nivcsw; r->ru_minflt += sig->min_flt; @@ -1792,19 +1791,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) } unlock_task_sighand(p, &flags); -out: + if (who == RUSAGE_CHILDREN) + goto out_children; + + thread_group_cputime_adjusted(p, &tgutime, &tgstime); + utime += tgutime; + stime += tgstime; + +out_thread: + mm = get_task_mm(p); + if (mm) { + setmax_mm_hiwater_rss(&maxrss, mm); + mmput(mm); + } + +out_children: + r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ r->ru_utime = ns_to_kernel_old_timeval(utime); r->ru_stime = ns_to_kernel_old_timeval(stime); - - if (who != RUSAGE_CHILDREN) { - struct mm_struct *mm = get_task_mm(p); - - if (mm) { - setmax_mm_hiwater_rss(&maxrss, mm); - mmput(mm); - } - } - r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ } SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) From 9ca97868205a97da136445ab770628e90ad02be2 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 9 Sep 2023 19:26:29 +0200 Subject: [PATCH 62/71] getrusage: use __for_each_thread() [ Upstream commit 13b7bc60b5353371460a203df6c38ccd38ad7a3a ] do/while_each_thread should be avoided when possible. Plus this change allows to avoid lock_task_sighand(), we can use rcu and/or sig->stats_lock instead. Link: https://lkml.kernel.org/r/20230909172629.GA20454@redhat.com Signed-off-by: Oleg Nesterov Cc: Eric W. Biederman Signed-off-by: Andrew Morton Stable-dep-of: f7ec1cd5cc7e ("getrusage: use sig->stats_lock rather than lock_task_sighand()") Signed-off-by: Sasha Levin --- kernel/sys.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 26c8783bd075..f1ae8fa62714 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1780,10 +1780,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += sig->oublock; if (maxrss < sig->maxrss) maxrss = sig->maxrss; - t = p; - do { + __for_each_thread(sig, t) accumulate_thread_rusage(t, r); - } while_each_thread(p, t); break; default: From f610023e67ecb12a314ca60c431bdba2ea203a39 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Jan 2024 16:50:53 +0100 Subject: [PATCH 63/71] getrusage: use sig->stats_lock rather than lock_task_sighand() [ Upstream commit f7ec1cd5cc7ef3ad964b677ba82b8b77f1c93009 ] lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call getrusage() at the same time and the process has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS * NR_THREADS) time. Change getrusage() to use sig->stats_lock, it was specifically designed for this type of use. This way it runs lockless in the likely case. TODO: - Change do_task_stat() to use sig->stats_lock too, then we can remove spin_lock_irq(siglock) in wait_task_zombie(). - Turn sig->stats_lock into seqcount_rwlock_t, this way the readers in the slow mode won't exclude each other. See https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/ - stats_lock has to disable irqs because ->siglock can be taken in irq context, it would be very nice to change __exit_signal() to avoid the siglock->stats_lock dependency. Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com Signed-off-by: Oleg Nesterov Reported-by: Dylan Hatch Tested-by: Dylan Hatch Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- kernel/sys.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index f1ae8fa62714..efc213ae4c5a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1739,7 +1739,9 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) unsigned long maxrss; struct mm_struct *mm; struct signal_struct *sig = p->signal; + unsigned int seq = 0; +retry: memset(r, 0, sizeof(*r)); utime = stime = 0; maxrss = 0; @@ -1751,8 +1753,7 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) goto out_thread; } - if (!lock_task_sighand(p, &flags)) - return; + flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); switch (who) { case RUSAGE_BOTH: @@ -1780,14 +1781,23 @@ void getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += sig->oublock; if (maxrss < sig->maxrss) maxrss = sig->maxrss; + + rcu_read_lock(); __for_each_thread(sig, t) accumulate_thread_rusage(t, r); + rcu_read_unlock(); + break; default: BUG(); } - unlock_task_sighand(p, &flags); + + if (need_seqretry(&sig->stats_lock, seq)) { + seq = 1; + goto retry; + } + done_seqretry_irqrestore(&sig->stats_lock, seq, flags); if (who == RUSAGE_CHILDREN) goto out_children; From 74d83d0fe04e08fd3389ca201d0f885b3e3fed3c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 25 Jun 2021 18:37:33 +0300 Subject: [PATCH 64/71] serial: max310x: Unprepare and disable clock in error path [ Upstream commit 61acabaae5ba58b3c32e6e90d24c2c0827fd27a8 ] In one error case the clock may be left prepared and enabled. Unprepare and disable clock in that case to balance state of the hardware. Fixes: d4d6f03c4fb3 ("serial: max310x: Try to get crystal clock rate from property") Reported-by: Dan Carpenter Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210625153733.12911-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 978d9d93127e..a09ec46e0310 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1293,7 +1293,8 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty freq = uartclk; if (freq == 0) { dev_err(dev, "Cannot get clock rate\n"); - return -EINVAL; + ret = -EINVAL; + goto out_clk; } if (xtal) { From 82a62478b9f75ed208c4f732d210498a7d3fbfd1 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Mon, 1 Mar 2021 20:13:48 +0100 Subject: [PATCH 65/71] Drivers: hv: vmbus: Drop error message when 'No request id available' [ Upstream commit 0c85c54bf7faeb80c6b76901ed77d93acef0207d ] Running out of request IDs on a channel essentially produces the same effect as running out of space in the ring buffer, in that -EAGAIN is returned. The error message in hv_ringbuffer_write() should either be dropped (since we don't output a message when the ring buffer is full) or be made conditional/debug-only. Suggested-by: Michael Kelley Signed-off-by: Andrea Parri (Microsoft) Fixes: e8b7db38449ac ("Drivers: hv: vmbus: Add vmbus_requestor data structure for VMBus hardening") Link: https://lore.kernel.org/r/20210301191348.196485-1-parri.andrea@gmail.com Signed-off-by: Wei Liu Signed-off-by: Sasha Levin --- drivers/hv/ring_buffer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index a0ba6ac48736..a49cc69c56af 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -326,7 +326,6 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, rqst_id = vmbus_next_request_id(&channel->requestor, requestid); if (rqst_id == VMBUS_RQST_ERROR) { spin_unlock_irqrestore(&outring_info->ring_lock, flags); - pr_err("No request id available\n"); return -EAGAIN; } } From 915848be2f1b24d8043aace414bc5f8174a13c0e Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Thu, 4 Nov 2021 16:00:40 +0100 Subject: [PATCH 66/71] regmap: allow to define reg_update_bits for no bus configuration [ Upstream commit 02d6fdecb9c38de19065f6bed8d5214556fd061d ] Some device requires a special handling for reg_update_bits and can't use the normal regmap read write logic. An example is when locking is handled by the device and rmw operations requires to do atomic operations. Allow to declare a dedicated function in regmap_config for reg_update_bits in no bus configuration. Signed-off-by: Ansuel Smith Link: https://lore.kernel.org/r/20211104150040.1260-1-ansuelsmth@gmail.com Signed-off-by: Mark Brown Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations") Signed-off-by: Sasha Levin --- drivers/base/regmap/regmap.c | 1 + include/linux/regmap.h | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 7bc603145bd9..8f39aacdad0d 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -842,6 +842,7 @@ struct regmap *__regmap_init(struct device *dev, if (!bus) { map->reg_read = config->reg_read; map->reg_write = config->reg_write; + map->reg_update_bits = config->reg_update_bits; map->defer_caching = false; goto skip_format_initialization; diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e7834d98207f..d6f0d876fa42 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -289,6 +289,11 @@ typedef void (*regmap_unlock)(void *); * read operation on a bus such as SPI, I2C, etc. Most of the * devices do not need this. * @reg_write: Same as above for writing. + * @reg_update_bits: Optional callback that if filled will be used to perform + * all the update_bits(rmw) operation. Should only be provided + * if the function require special handling with lock and reg + * handling and the operation cannot be represented as a simple + * update_bits operation on a bus such as SPI, I2C, etc. * @fast_io: Register IO is fast. Use a spinlock instead of a mutex * to perform locking. This field is ignored if custom lock/unlock * functions are used (see fields lock/unlock of struct regmap_config). @@ -366,6 +371,8 @@ struct regmap_config { int (*reg_read)(void *context, unsigned int reg, unsigned int *val); int (*reg_write)(void *context, unsigned int reg, unsigned int val); + int (*reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val); bool fast_io; From f36ef837a7a6978e652814fe6e4863e7dee35b04 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sat, 30 Apr 2022 04:51:44 +0200 Subject: [PATCH 67/71] regmap: Add bulk read/write callbacks into regmap_config [ Upstream commit d77e745613680c54708470402e2b623dcd769681 ] Currently the regmap_config structure only allows the user to implement single element register read/write using .reg_read/.reg_write callbacks. The regmap_bus already implements bulk counterparts of both, and is being misused as a workaround for the missing bulk read/write callbacks in regmap_config by a couple of drivers. To stop this misuse, add the bulk read/write callbacks to regmap_config and call them from the regmap core code. Signed-off-by: Marek Vasut Cc: Jagan Teki Cc: Mark Brown Cc: Maxime Ripard Cc: Robert Foss Cc: Sam Ravnborg Cc: Thomas Zimmermann To: dri-devel@lists.freedesktop.org Link: https://lore.kernel.org/r/20220430025145.640305-1-marex@denx.de Signed-off-by: Mark Brown Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations") Signed-off-by: Sasha Levin --- drivers/base/regmap/internal.h | 4 ++ drivers/base/regmap/regmap.c | 76 ++++++++++++++++++---------------- include/linux/regmap.h | 12 ++++++ 3 files changed, 56 insertions(+), 36 deletions(-) diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index 0097696c31de..2720d8d7bbfc 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -104,6 +104,10 @@ struct regmap { int (*reg_write)(void *context, unsigned int reg, unsigned int val); int (*reg_update_bits)(void *context, unsigned int reg, unsigned int mask, unsigned int val); + /* Bulk read/write */ + int (*read)(void *context, const void *reg_buf, size_t reg_size, + void *val_buf, size_t val_size); + int (*write)(void *context, const void *data, size_t count); bool defer_caching; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 8f39aacdad0d..2dfd6aa60045 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -800,12 +800,15 @@ struct regmap *__regmap_init(struct device *dev, map->reg_stride_order = ilog2(map->reg_stride); else map->reg_stride_order = -1; - map->use_single_read = config->use_single_read || !bus || !bus->read; - map->use_single_write = config->use_single_write || !bus || !bus->write; - map->can_multi_write = config->can_multi_write && bus && bus->write; + map->use_single_read = config->use_single_read || !(config->read || (bus && bus->read)); + map->use_single_write = config->use_single_write || !(config->write || (bus && bus->write)); + map->can_multi_write = config->can_multi_write && (config->write || (bus && bus->write)); if (bus) { map->max_raw_read = bus->max_raw_read; map->max_raw_write = bus->max_raw_write; + } else if (config->max_raw_read && config->max_raw_write) { + map->max_raw_read = config->max_raw_read; + map->max_raw_write = config->max_raw_write; } map->dev = dev; map->bus = bus; @@ -839,7 +842,16 @@ struct regmap *__regmap_init(struct device *dev, map->read_flag_mask = bus->read_flag_mask; } - if (!bus) { + if (config && config->read && config->write) { + map->reg_read = _regmap_bus_read; + + /* Bulk read/write */ + map->read = config->read; + map->write = config->write; + + reg_endian = REGMAP_ENDIAN_NATIVE; + val_endian = REGMAP_ENDIAN_NATIVE; + } else if (!bus) { map->reg_read = config->reg_read; map->reg_write = config->reg_write; map->reg_update_bits = config->reg_update_bits; @@ -856,10 +868,13 @@ struct regmap *__regmap_init(struct device *dev, } else { map->reg_read = _regmap_bus_read; map->reg_update_bits = bus->reg_update_bits; - } + /* Bulk read/write */ + map->read = bus->read; + map->write = bus->write; - reg_endian = regmap_get_reg_endian(bus, config); - val_endian = regmap_get_val_endian(dev, bus, config); + reg_endian = regmap_get_reg_endian(bus, config); + val_endian = regmap_get_val_endian(dev, bus, config); + } switch (config->reg_bits + map->reg_shift) { case 2: @@ -1628,8 +1643,6 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, size_t len; int i; - WARN_ON(!map->bus); - /* Check for unwritable or noinc registers in range * before we start */ @@ -1711,7 +1724,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, val = work_val; } - if (map->async && map->bus->async_write) { + if (map->async && map->bus && map->bus->async_write) { struct regmap_async *async; trace_regmap_async_write_start(map, reg, val_len); @@ -1779,10 +1792,10 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, * write. */ if (val == work_val) - ret = map->bus->write(map->bus_context, map->work_buf, - map->format.reg_bytes + - map->format.pad_bytes + - val_len); + ret = map->write(map->bus_context, map->work_buf, + map->format.reg_bytes + + map->format.pad_bytes + + val_len); else if (map->bus->gather_write) ret = map->bus->gather_write(map->bus_context, map->work_buf, map->format.reg_bytes + @@ -1801,7 +1814,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, memcpy(buf, map->work_buf, map->format.reg_bytes); memcpy(buf + map->format.reg_bytes + map->format.pad_bytes, val, val_len); - ret = map->bus->write(map->bus_context, buf, len); + ret = map->write(map->bus_context, buf, len); kfree(buf); } else if (ret != 0 && !map->cache_bypass && map->format.parse_val) { @@ -1858,7 +1871,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg, struct regmap_range_node *range; struct regmap *map = context; - WARN_ON(!map->bus || !map->format.format_write); + WARN_ON(!map->format.format_write); range = _regmap_range_lookup(map, reg); if (range) { @@ -1871,8 +1884,7 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg, trace_regmap_hw_write_start(map, reg, 1); - ret = map->bus->write(map->bus_context, map->work_buf, - map->format.buf_size); + ret = map->write(map->bus_context, map->work_buf, map->format.buf_size); trace_regmap_hw_write_done(map, reg, 1); @@ -1892,7 +1904,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg, { struct regmap *map = context; - WARN_ON(!map->bus || !map->format.format_val); + WARN_ON(!map->format.format_val); map->format.format_val(map->work_buf + map->format.reg_bytes + map->format.pad_bytes, val, 0); @@ -1906,7 +1918,7 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg, static inline void *_regmap_map_get_context(struct regmap *map) { - return (map->bus) ? map : map->bus_context; + return (map->bus || (!map->bus && map->read)) ? map : map->bus_context; } int _regmap_write(struct regmap *map, unsigned int reg, @@ -2313,7 +2325,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map, u8 = buf; *u8 |= map->write_flag_mask; - ret = map->bus->write(map->bus_context, buf, len); + ret = map->write(map->bus_context, buf, len); kfree(buf); @@ -2619,9 +2631,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, struct regmap_range_node *range; int ret; - WARN_ON(!map->bus); - - if (!map->bus || !map->bus->read) + if (!map->read) return -EINVAL; range = _regmap_range_lookup(map, reg); @@ -2637,9 +2647,9 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, map->read_flag_mask); trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes); - ret = map->bus->read(map->bus_context, map->work_buf, - map->format.reg_bytes + map->format.pad_bytes, - val, val_len); + ret = map->read(map->bus_context, map->work_buf, + map->format.reg_bytes + map->format.pad_bytes, + val, val_len); trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes); @@ -2750,8 +2760,6 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, unsigned int v; int ret, i; - if (!map->bus) - return -EINVAL; if (val_len % map->format.val_bytes) return -EINVAL; if (!IS_ALIGNED(reg, map->reg_stride)) @@ -2766,7 +2774,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, size_t chunk_count, chunk_bytes; size_t chunk_regs = val_count; - if (!map->bus->read) { + if (!map->read) { ret = -ENOTSUPP; goto out; } @@ -2826,7 +2834,7 @@ EXPORT_SYMBOL_GPL(regmap_raw_read); * @val: Pointer to data buffer * @val_len: Length of output buffer in bytes. * - * The regmap API usually assumes that bulk bus read operations will read a + * The regmap API usually assumes that bulk read operations will read a * range of registers. Some devices have certain registers for which a read * operation read will read from an internal FIFO. * @@ -2844,10 +2852,6 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, size_t read_len; int ret; - if (!map->bus) - return -EINVAL; - if (!map->bus->read) - return -ENOTSUPP; if (val_len % map->format.val_bytes) return -EINVAL; if (!IS_ALIGNED(reg, map->reg_stride)) @@ -2961,7 +2965,7 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, if (val_count == 0) return -EINVAL; - if (map->bus && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) { + if (map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) { ret = regmap_raw_read(map, reg, val, val_bytes * val_count); if (ret != 0) return ret; diff --git a/include/linux/regmap.h b/include/linux/regmap.h index d6f0d876fa42..83a7485de78f 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -294,6 +294,12 @@ typedef void (*regmap_unlock)(void *); * if the function require special handling with lock and reg * handling and the operation cannot be represented as a simple * update_bits operation on a bus such as SPI, I2C, etc. + * @read: Optional callback that if filled will be used to perform all the + * bulk reads from the registers. Data is returned in the buffer used + * to transmit data. + * @write: Same as above for writing. + * @max_raw_read: Max raw read size that can be used on the device. + * @max_raw_write: Max raw write size that can be used on the device. * @fast_io: Register IO is fast. Use a spinlock instead of a mutex * to perform locking. This field is ignored if custom lock/unlock * functions are used (see fields lock/unlock of struct regmap_config). @@ -373,6 +379,12 @@ struct regmap_config { int (*reg_write)(void *context, unsigned int reg, unsigned int val); int (*reg_update_bits)(void *context, unsigned int reg, unsigned int mask, unsigned int val); + /* Bulk read/write */ + int (*read)(void *context, const void *reg_buf, size_t reg_size, + void *val_buf, size_t val_size); + int (*write)(void *context, const void *data, size_t count); + size_t max_raw_read; + size_t max_raw_write; bool fast_io; From 8082cc992dec60342c99078f500b0ca67fb33997 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Sun, 5 Jun 2022 17:46:58 +0300 Subject: [PATCH 68/71] serial: max310x: make accessing revision id interface-agnostic [ Upstream commit b3883ab5e95713e479f774ea68be275413e8e5b2 ] SPI can only use 5 address bits, since one bit is reserved for specifying R/W and 2 bits are used to specify the UART port. To access registers that have addresses past 0x1F, an extended register space can be enabled by writing to the GlobalCommand register (address 0x1F). I2C uses 8 address bits. The R/W bit is placed in the slave address, and so is the UART port. Because of this, registers that have addresses higher than 0x1F can be accessed normally. To access the RevID register, on SPI, 0xCE must be written to the 0x1F address to enable the extended register space, after which the RevID register is accessible at address 0x5. 0xCD must be written to the 0x1F address to disable the extended register space. On I2C, the RevID register is accessible at address 0x25. Create an interface config struct, and add a method for toggling the extended register space and a member for the RevId register address. Implement these for SPI. Reviewed-by: Andy Shevchenko Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20220605144659.4169853-4-demonsingur@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations") Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 40 +++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index a09ec46e0310..b90281ac54c8 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -72,7 +72,7 @@ #define MAX310X_GLOBALCMD_REG MAX310X_REG_1F /* Global Command (WO) */ /* Extended registers */ -#define MAX310X_REVID_EXTREG MAX310X_REG_05 /* Revision ID */ +#define MAX310X_SPI_REVID_EXTREG MAX310X_REG_05 /* Revision ID */ /* IRQ register bits */ #define MAX310X_IRQ_LSR_BIT (1 << 0) /* LSR interrupt */ @@ -253,6 +253,12 @@ #define MAX14830_BRGCFG_CLKDIS_BIT (1 << 6) /* Clock Disable */ #define MAX14830_REV_ID (0xb0) +struct max310x_if_cfg { + int (*extended_reg_enable)(struct device *dev, bool enable); + + unsigned int rev_id_reg; +}; + struct max310x_devtype { char name[9]; int nr; @@ -275,6 +281,7 @@ struct max310x_one { struct max310x_port { const struct max310x_devtype *devtype; + const struct max310x_if_cfg *if_cfg; struct regmap *regmap; struct clk *clk; #ifdef CONFIG_GPIOLIB @@ -364,13 +371,12 @@ static int max3109_detect(struct device *dev) unsigned int val = 0; int ret; - ret = regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, - MAX310X_EXTREG_ENBL); + ret = s->if_cfg->extended_reg_enable(dev, true); if (ret) return ret; - regmap_read(s->regmap, MAX310X_REVID_EXTREG, &val); - regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, MAX310X_EXTREG_DSBL); + regmap_read(s->regmap, s->if_cfg->rev_id_reg, &val); + s->if_cfg->extended_reg_enable(dev, false); if (((val & MAX310x_REV_MASK) != MAX3109_REV_ID)) { dev_err(dev, "%s ID 0x%02x does not match\n", s->devtype->name, val); @@ -395,13 +401,12 @@ static int max14830_detect(struct device *dev) unsigned int val = 0; int ret; - ret = regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, - MAX310X_EXTREG_ENBL); + ret = s->if_cfg->extended_reg_enable(dev, true); if (ret) return ret; - regmap_read(s->regmap, MAX310X_REVID_EXTREG, &val); - regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, MAX310X_EXTREG_DSBL); + regmap_read(s->regmap, s->if_cfg->rev_id_reg, &val); + s->if_cfg->extended_reg_enable(dev, false); if (((val & MAX310x_REV_MASK) != MAX14830_REV_ID)) { dev_err(dev, "%s ID 0x%02x does not match\n", s->devtype->name, val); @@ -1250,6 +1255,7 @@ static int max310x_gpio_set_config(struct gpio_chip *chip, unsigned int offset, #endif static int max310x_probe(struct device *dev, const struct max310x_devtype *devtype, + const struct max310x_if_cfg *if_cfg, struct regmap *regmaps[], int irq) { int i, ret, fmin, fmax, freq; @@ -1313,6 +1319,7 @@ static int max310x_probe(struct device *dev, const struct max310x_devtype *devty s->regmap = regmaps[0]; s->devtype = devtype; + s->if_cfg = if_cfg; dev_set_drvdata(dev, s); /* Check device to ensure we are talking to what we expect */ @@ -1482,6 +1489,19 @@ static struct regmap_config regcfg = { }; #ifdef CONFIG_SPI_MASTER +static int max310x_spi_extended_reg_enable(struct device *dev, bool enable) +{ + struct max310x_port *s = dev_get_drvdata(dev); + + return regmap_write(s->regmap, MAX310X_GLOBALCMD_REG, + enable ? MAX310X_EXTREG_ENBL : MAX310X_EXTREG_DSBL); +} + +static const struct max310x_if_cfg __maybe_unused max310x_spi_if_cfg = { + .extended_reg_enable = max310x_spi_extended_reg_enable, + .rev_id_reg = MAX310X_SPI_REVID_EXTREG, +}; + static int max310x_spi_probe(struct spi_device *spi) { const struct max310x_devtype *devtype; @@ -1508,7 +1528,7 @@ static int max310x_spi_probe(struct spi_device *spi) regmaps[i] = devm_regmap_init_spi(spi, ®cfg); } - return max310x_probe(&spi->dev, devtype, regmaps, spi->irq); + return max310x_probe(&spi->dev, devtype, &max310x_spi_if_cfg, regmaps, spi->irq); } static int max310x_spi_remove(struct spi_device *spi) From 85d79478710ad2cbf11857aec107084a7104943e Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Sun, 5 Jun 2022 17:46:59 +0300 Subject: [PATCH 69/71] serial: max310x: implement I2C support [ Upstream commit 2e1f2d9a9bdbe12ee475c82a45ac46a278e8049a ] I2C implementation on this chip has a few key differences compared to SPI, as described in previous patches. * extended register space access needs no extra logic * slave address is used to select which UART to communicate with To accommodate these differences, add an I2C interface config, set the RevID register address and implement an empty method for setting the GlobalCommand register, since no special handling is needed for the extended register space. To handle the port-specific slave address, create an I2C dummy device for each port, except the base one (UART0), which is expected to be the one specified in firmware, and create a regmap for each I2C device. Add minimum and maximum slave addresses to each devtype for sanity checking. Also, use a separate regmap config with no write_flag_mask, since I2C has a R/W bit in its slave address, and set the max register to the address of the RevID register, since the extended register space needs no extra logic. Finally, add the I2C driver. Reviewed-by: Andy Shevchenko Signed-off-by: Cosmin Tanislav Link: https://lore.kernel.org/r/20220605144659.4169853-5-demonsingur@gmail.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 3f42b142ea11 ("serial: max310x: fix IO data corruption in batched operations") Signed-off-by: Sasha Levin --- drivers/tty/serial/Kconfig | 1 + drivers/tty/serial/max310x.c | 135 ++++++++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 28f22e58639c..bd30ae9751bf 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -343,6 +343,7 @@ config SERIAL_MAX310X depends on SPI_MASTER select SERIAL_CORE select REGMAP_SPI if SPI_MASTER + select REGMAP_I2C if I2C help This selects support for an advanced UART from Maxim (Dallas). Supported ICs are MAX3107, MAX3108, MAX3109, MAX14830. diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index b90281ac54c8..ed1aaa19854f 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ /* Extended registers */ #define MAX310X_SPI_REVID_EXTREG MAX310X_REG_05 /* Revision ID */ +#define MAX310X_I2C_REVID_EXTREG (0x25) /* Revision ID */ /* IRQ register bits */ #define MAX310X_IRQ_LSR_BIT (1 << 0) /* LSR interrupt */ @@ -260,6 +262,10 @@ struct max310x_if_cfg { }; struct max310x_devtype { + struct { + unsigned short min; + unsigned short max; + } slave_addr; char name[9]; int nr; u8 mode1; @@ -431,6 +437,10 @@ static const struct max310x_devtype max3107_devtype = { .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT | MAX310X_MODE1_IRQSEL_BIT, .detect = max3107_detect, .power = max310x_power, + .slave_addr = { + .min = 0x2c, + .max = 0x2f, + }, }; static const struct max310x_devtype max3108_devtype = { @@ -439,6 +449,10 @@ static const struct max310x_devtype max3108_devtype = { .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT, .detect = max3108_detect, .power = max310x_power, + .slave_addr = { + .min = 0x60, + .max = 0x6f, + }, }; static const struct max310x_devtype max3109_devtype = { @@ -447,6 +461,10 @@ static const struct max310x_devtype max3109_devtype = { .mode1 = MAX310X_MODE1_AUTOSLEEP_BIT, .detect = max3109_detect, .power = max310x_power, + .slave_addr = { + .min = 0x60, + .max = 0x6f, + }, }; static const struct max310x_devtype max14830_devtype = { @@ -455,6 +473,10 @@ static const struct max310x_devtype max14830_devtype = { .mode1 = MAX310X_MODE1_IRQSEL_BIT, .detect = max14830_detect, .power = max14830_power, + .slave_addr = { + .min = 0x60, + .max = 0x6f, + }, }; static bool max310x_reg_writeable(struct device *dev, unsigned int reg) @@ -1557,6 +1579,97 @@ static struct spi_driver max310x_spi_driver = { }; #endif +#ifdef CONFIG_I2C +static int max310x_i2c_extended_reg_enable(struct device *dev, bool enable) +{ + return 0; +} + +static struct regmap_config regcfg_i2c = { + .reg_bits = 8, + .val_bits = 8, + .cache_type = REGCACHE_RBTREE, + .writeable_reg = max310x_reg_writeable, + .volatile_reg = max310x_reg_volatile, + .precious_reg = max310x_reg_precious, + .max_register = MAX310X_I2C_REVID_EXTREG, +}; + +static const struct max310x_if_cfg max310x_i2c_if_cfg = { + .extended_reg_enable = max310x_i2c_extended_reg_enable, + .rev_id_reg = MAX310X_I2C_REVID_EXTREG, +}; + +static unsigned short max310x_i2c_slave_addr(unsigned short addr, + unsigned int nr) +{ + /* + * For MAX14830 and MAX3109, the slave address depends on what the + * A0 and A1 pins are tied to. + * See Table I2C Address Map of the datasheet. + * Based on that table, the following formulas were determined. + * UART1 - UART0 = 0x10 + * UART2 - UART1 = 0x20 + 0x10 + * UART3 - UART2 = 0x10 + */ + + addr -= nr * 0x10; + + if (nr >= 2) + addr -= 0x20; + + return addr; +} + +static int max310x_i2c_probe(struct i2c_client *client) +{ + const struct max310x_devtype *devtype = + device_get_match_data(&client->dev); + struct i2c_client *port_client; + struct regmap *regmaps[4]; + unsigned int i; + u8 port_addr; + + if (client->addr < devtype->slave_addr.min || + client->addr > devtype->slave_addr.max) + return dev_err_probe(&client->dev, -EINVAL, + "Slave addr 0x%x outside of range [0x%x, 0x%x]\n", + client->addr, devtype->slave_addr.min, + devtype->slave_addr.max); + + regmaps[0] = devm_regmap_init_i2c(client, ®cfg_i2c); + + for (i = 1; i < devtype->nr; i++) { + port_addr = max310x_i2c_slave_addr(client->addr, i); + port_client = devm_i2c_new_dummy_device(&client->dev, + client->adapter, + port_addr); + + regmaps[i] = devm_regmap_init_i2c(port_client, ®cfg_i2c); + } + + return max310x_probe(&client->dev, devtype, &max310x_i2c_if_cfg, + regmaps, client->irq); +} + +static int max310x_i2c_remove(struct i2c_client *client) +{ + max310x_remove(&client->dev); + + return 0; +} + +static struct i2c_driver max310x_i2c_driver = { + .driver = { + .name = MAX310X_NAME, + .of_match_table = max310x_dt_ids, + .pm = &max310x_pm_ops, + }, + .probe_new = max310x_i2c_probe, + .remove = max310x_i2c_remove, +}; +#endif + static int __init max310x_uart_init(void) { int ret; @@ -1570,15 +1683,35 @@ static int __init max310x_uart_init(void) #ifdef CONFIG_SPI_MASTER ret = spi_register_driver(&max310x_spi_driver); if (ret) - uart_unregister_driver(&max310x_uart); + goto err_spi_register; #endif +#ifdef CONFIG_I2C + ret = i2c_add_driver(&max310x_i2c_driver); + if (ret) + goto err_i2c_register; +#endif + + return 0; + +#ifdef CONFIG_I2C +err_i2c_register: + spi_unregister_driver(&max310x_spi_driver); +#endif + +err_spi_register: + uart_unregister_driver(&max310x_uart); + return ret; } module_init(max310x_uart_init); static void __exit max310x_uart_exit(void) { +#ifdef CONFIG_I2C + i2c_del_driver(&max310x_i2c_driver); +#endif + #ifdef CONFIG_SPI_MASTER spi_unregister_driver(&max310x_spi_driver); #endif From 738845b022d3244b36ab63e76968fe25c5c78ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= Date: Wed, 5 Apr 2023 22:14:23 +0200 Subject: [PATCH 70/71] serial: max310x: fix IO data corruption in batched operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3f42b142ea1171967e40e10e4b0241c0d6d28d41 ] After upgrading from 5.16 to 6.1, our board with a MAX14830 started producing lots of garbage data over UART. Bisection pointed out commit 285e76fc049c as the culprit. That patch tried to replace hand-written code which I added in 2b4bac48c1084 ("serial: max310x: Use batched reads when reasonably safe") with the generic regmap infrastructure for batched operations. Unfortunately, the `regmap_raw_read` and `regmap_raw_write` which were used are actually functions which perform IO over *multiple* registers. That's not what is needed for accessing these Tx/Rx FIFOs; the appropriate functions are the `_noinc_` versions, not the `_raw_` ones. Fix this regression by using `regmap_noinc_read()` and `regmap_noinc_write()` along with the necessary `regmap_config` setup; with this patch in place, our board communicates happily again. Since our board uses SPI for talking to this chip, the I2C part is completely untested. Fixes: 285e76fc049c ("serial: max310x: use regmap methods for SPI batch operations") Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: Jan Kundrát Link: https://lore.kernel.org/r/79db8e82aadb0e174bc82b9996423c3503c8fb37.1680732084.git.jan.kundrat@cesnet.cz Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/max310x.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index ed1aaa19854f..2f88eae8a55a 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -533,6 +533,11 @@ static bool max310x_reg_precious(struct device *dev, unsigned int reg) return false; } +static bool max310x_reg_noinc(struct device *dev, unsigned int reg) +{ + return reg == MAX310X_RHR_REG; +} + static int max310x_set_baud(struct uart_port *port, int baud) { unsigned int mode = 0, div = 0, frac = 0, c = 0, F = 0; @@ -667,14 +672,14 @@ static void max310x_batch_write(struct uart_port *port, u8 *txbuf, unsigned int { struct max310x_one *one = to_max310x_port(port); - regmap_raw_write(one->regmap, MAX310X_THR_REG, txbuf, len); + regmap_noinc_write(one->regmap, MAX310X_THR_REG, txbuf, len); } static void max310x_batch_read(struct uart_port *port, u8 *rxbuf, unsigned int len) { struct max310x_one *one = to_max310x_port(port); - regmap_raw_read(one->regmap, MAX310X_RHR_REG, rxbuf, len); + regmap_noinc_read(one->regmap, MAX310X_RHR_REG, rxbuf, len); } static void max310x_handle_rx(struct uart_port *port, unsigned int rxlen) @@ -1508,6 +1513,10 @@ static struct regmap_config regcfg = { .writeable_reg = max310x_reg_writeable, .volatile_reg = max310x_reg_volatile, .precious_reg = max310x_reg_precious, + .writeable_noinc_reg = max310x_reg_noinc, + .readable_noinc_reg = max310x_reg_noinc, + .max_raw_read = MAX310X_FIFO_SIZE, + .max_raw_write = MAX310X_FIFO_SIZE, }; #ifdef CONFIG_SPI_MASTER @@ -1593,6 +1602,10 @@ static struct regmap_config regcfg_i2c = { .volatile_reg = max310x_reg_volatile, .precious_reg = max310x_reg_precious, .max_register = MAX310X_I2C_REVID_EXTREG, + .writeable_noinc_reg = max310x_reg_noinc, + .readable_noinc_reg = max310x_reg_noinc, + .max_raw_read = MAX310X_FIFO_SIZE, + .max_raw_write = MAX310X_FIFO_SIZE, }; static const struct max310x_if_cfg max310x_i2c_if_cfg = { From d35f38551c217d32daa0b7f5008adf1d21226042 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 13 Mar 2024 07:43:30 -0400 Subject: [PATCH 71/71] Linux 5.10.213 Tested-by: Pavel Machek (CIP) Tested-by: Dominique Martinet Tested-by: Linux Kernel Functional Testing Tested-by: Florian Fainelli Tested-by: kernelci.org bot Signed-off-by: Sasha Levin --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d7ec0be4cd79..b6af62d53d7a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 212 +SUBLEVEL = 213 EXTRAVERSION = NAME = Dare mighty things