From c17e5cbbb180bf45c415dc2f0c9a89dfdf9bf512 Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Wed, 18 Sep 2024 20:05:57 -0700 Subject: [PATCH 01/32] RDMA/bnxt_re: Fix incorrect AVID type in WQE structure [ Upstream commit 9ab20f76ae9fad55ebaf36bdff04aea1c2552374 ] Driver uses internal data structure to construct WQE frame. It used avid type as u16 which can accommodate up to 64K AVs. When outstanding AVID crosses 64K, driver truncates AVID and hence it uses incorrect AVID to WR. This leads to WR failure due to invalid AV ID and QP is moved to error state with reason set to 19 (INVALID AVID). When RDMA CM path is used, this issue hits QP1 and it is moved to error state Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Link: https://patch.msgid.link/r/1726715161-18941-3-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Selvin Xavier Reviewed-by: Chandramohan Akula Signed-off-by: Saravanan Vajravel Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_fp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index f112f013df7d..01cb48caa9db 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -167,7 +167,7 @@ struct bnxt_qplib_swqe { }; u32 q_key; u32 dst_qp; - u16 avid; + u32 avid; } send; /* Send Raw Ethernet and QP1 */ From dbe51dd516e6d4e655f31c8a1cbc050dde7ba97b Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 18 Sep 2024 20:05:58 -0700 Subject: [PATCH 02/32] RDMA/bnxt_re: Add a check for memory allocation [ Upstream commit c5c1ae73b7741fa3b58e6e001b407825bb971225 ] __alloc_pbl() can return error when memory allocation fails. Driver is not checking the status on one of the instances. Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Link: https://patch.msgid.link/r/1726715161-18941-4-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Selvin Xavier Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 123ea759f282..2861a2bbea6e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -243,6 +243,8 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, sginfo.pgsize = npde * pg_size; sginfo.npages = 1; rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo); + if (rc) + goto fail; /* Alloc PBL pages */ sginfo.npages = npbl; From 9076d449e77be6d34b11411d6a8178cb7f95c915 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Tue, 16 Jul 2024 02:03:11 +0300 Subject: [PATCH 03/32] ARM: dts: bcm2837-rpi-cm3-io3: Fix HDMI hpd-gpio pin [ Upstream commit dc7785e4723510616d776862ddb4c08857a1bdb2 ] HDMI_HPD_N_1V8 is connected to GPIO pin 0, not 1. This fixes HDMI hotplug/output detection. See https://datasheets.raspberrypi.com/cm/cm3-schematics.pdf Signed-off-by: Florian Klink Reviewed-by: Stefan Wahren Link: https://lore.kernel.org/r/20240715230311.685641-1-flokli@flokli.de Reviewed-by: Stefan Wahren Fixes: a54fe8a6cf66 ("ARM: dts: add Raspberry Pi Compute Module 3 and IO board") Signed-off-by: Florian Fainelli Signed-off-by: Sasha Levin --- arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts index 3dfce4312dfc..a2ef43c2105a 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts @@ -77,7 +77,7 @@ &gpio { }; &hdmi { - hpd-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; + hpd-gpios = <&expgpio 0 GPIO_ACTIVE_LOW>; power-domains = <&power RPI_POWER_DOMAIN_HDMI>; status = "okay"; }; From 59df170bdec9e08b9b4486c88fe6d09ff94bc6f9 Mon Sep 17 00:00:00 2001 From: Anumula Murali Mohan Reddy Date: Mon, 7 Oct 2024 18:53:11 +0530 Subject: [PATCH 04/32] RDMA/cxgb4: Fix RDMA_CM_EVENT_UNREACHABLE error for iWARP [ Upstream commit c659b405b82ead335bee6eb33f9691bf718e21e8 ] ip_dev_find() always returns real net_device address, whether traffic is running on a vlan or real device, if traffic is over vlan, filling endpoint struture with real ndev and an attempt to send a connect request will results in RDMA_CM_EVENT_UNREACHABLE error. This patch fixes the issue by using vlan_dev_real_dev(). Fixes: 830662f6f032 ("RDMA/cxgb4: Add support for active and passive open connection with IPv6 address") Link: https://patch.msgid.link/r/20241007132311.70593-1-anumula@chelsio.com Signed-off-by: Anumula Murali Mohan Reddy Signed-off-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/cxgb4/cm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 95300b2e1ffe..b607c1782738 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2086,7 +2086,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, err = -ENOMEM; if (n->dev->flags & IFF_LOOPBACK) { if (iptype == 4) - pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip); + pdev = __ip_dev_find(&init_net, *(__be32 *)peer_ip, false); else if (IS_ENABLED(CONFIG_IPV6)) for_each_netdev(&init_net, pdev) { if (ipv6_chk_addr(&init_net, @@ -2101,12 +2101,12 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, err = -ENODEV; goto out; } + if (is_vlan_dev(pdev)) + pdev = vlan_dev_real_dev(pdev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) { - dev_put(pdev); + if (!ep->l2t) goto out; - } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; @@ -2119,7 +2119,6 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, ep->rss_qid = cdev->rdev.lldi.rxq_ids[ cxgb4_port_idx(pdev) * step]; set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); - dev_put(pdev); } else { pdev = get_real_dev(n->dev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, From 718609f51866e27c533dd018a6f88a2542bea2de Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 9 Oct 2024 14:47:13 -0400 Subject: [PATCH 05/32] ipv4: give an IPv4 dev to blackhole_netdev [ Upstream commit 22600596b6756b166fd052d5facb66287e6f0bad ] After commit 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries"), blackhole_netdev was introduced to invalidate dst cache entries on the TX path whenever the cache times out or is flushed. When two UDP sockets (sk1 and sk2) send messages to the same destination simultaneously, they are using the same dst cache. If the dst cache is invalidated on one path (sk2) while the other (sk1) is still transmitting, sk1 may try to use the invalid dst entry. CPU1 CPU2 udp_sendmsg(sk1) udp_sendmsg(sk2) udp_send_skb() ip_output() <--- dst timeout or flushed dst_dev_put() ip_finish_output2() ip_neigh_for_gw() This results in a scenario where ip_neigh_for_gw() returns -EINVAL because blackhole_dev lacks an in_dev, which is needed to initialize the neigh in arp_constructor(). This error is then propagated back to userspace, breaking the UDP application. The patch fixes this issue by assigning an in_dev to blackhole_dev for IPv4, similar to what was done for IPv6 in commit e5f80fcf869a ("ipv6: give an IPv6 dev to blackhole_netdev"). This ensures that even when the dst entry is invalidated with blackhole_dev, it will not fail to create the neigh entry. As devinet_init() is called ealier than blackhole_netdev_init() in system booting, it can not assign the in_dev to blackhole_dev in devinet_init(). As Paolo suggested, add a separate late_initcall() in devinet.c to ensure inet_blackhole_dev_init() is called after blackhole_netdev_init(). Fixes: 8d7017fd621d ("blackhole_netdev: use blackhole_netdev to invalidate dst entries") Signed-off-by: Xin Long Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/3000792d45ca44e16c785ebe2b092e610e5b3df1.1728499633.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/devinet.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7868f316a477..6918b3ced671 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -273,17 +273,19 @@ static struct in_device *inetdev_init(struct net_device *dev) /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); - err = devinet_sysctl_register(in_dev); - if (err) { - in_dev->dead = 1; - neigh_parms_release(&arp_tbl, in_dev->arp_parms); - in_dev_put(in_dev); - in_dev = NULL; - goto out; + if (dev != blackhole_netdev) { + err = devinet_sysctl_register(in_dev); + if (err) { + in_dev->dead = 1; + neigh_parms_release(&arp_tbl, in_dev->arp_parms); + in_dev_put(in_dev); + in_dev = NULL; + goto out; + } + ip_mc_init_dev(in_dev); + if (dev->flags & IFF_UP) + ip_mc_up(in_dev); } - ip_mc_init_dev(in_dev); - if (dev->flags & IFF_UP) - ip_mc_up(in_dev); /* we can receive as soon as ip_ptr is set -- do this last */ rcu_assign_pointer(dev->ip_ptr, in_dev); @@ -328,6 +330,19 @@ static void inetdev_destroy(struct in_device *in_dev) call_rcu(&in_dev->rcu_head, in_dev_rcu_put); } +static int __init inet_blackhole_dev_init(void) +{ + int err = 0; + + rtnl_lock(); + if (!inetdev_init(blackhole_netdev)) + err = -ENOMEM; + rtnl_unlock(); + + return err; +} +late_initcall(inet_blackhole_dev_init); + int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) { const struct in_ifaddr *ifa; From 78aaf54ad5d7775aab92529ac2895acce8afd324 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 8 Oct 2024 00:41:36 -0700 Subject: [PATCH 06/32] RDMA/bnxt_re: Return more meaningful error [ Upstream commit 98647df0178df215b8239c5c365537283b2852a6 ] When the HWRM command fails, driver currently returns -EFAULT(Bad address). This does not look correct. Modified to return -EIO(I/O error). Fixes: cc1ec769b87c ("RDMA/bnxt_re: Fixing the Control path command and response handling") Fixes: 65288a22ddd8 ("RDMA/bnxt_re: use shadow qd while posting non blocking rcfw command") Link: https://patch.msgid.link/r/1728373302-19530-5-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 2b0c3a86293c..148f2c51a946 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -245,7 +245,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, /* failed with status */ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n", cookie, opcode, evnt->status); - rc = -EFAULT; + rc = -EIO; } return rc; From df6fed0a2a1a5e57f033bca40dc316b18e0d0ce6 Mon Sep 17 00:00:00 2001 From: Bhargava Chenna Marreddy Date: Tue, 8 Oct 2024 00:41:41 -0700 Subject: [PATCH 07/32] RDMA/bnxt_re: Fix a bug while setting up Level-2 PBL pages [ Upstream commit 7988bdbbb85ac85a847baf09879edcd0f70521dc ] Avoid memory corruption while setting up Level-2 PBL pages for the non MR resources when num_pages > 256K. There will be a single PDE page address (contiguous pages in the case of > PAGE_SIZE), but, current logic assumes multiple pages, leading to invalid memory access after 256K PBL entries in the PDE. Fixes: 0c4dcd602817 ("RDMA/bnxt_re: Refactor hardware queue memory allocation") Link: https://patch.msgid.link/r/1728373302-19530-10-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Bhargava Chenna Marreddy Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/qplib_res.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 2861a2bbea6e..af23e57fc78e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -256,22 +256,9 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, dst_virt_ptr = (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr; src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr; - if (hwq_attr->type == HWQ_TYPE_MR) { - /* For MR it is expected that we supply only 1 contigous - * page i.e only 1 entry in the PDL that will contain - * all the PBLs for the user supplied memory region - */ - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; - i++) - dst_virt_ptr[0][i] = src_phys_ptr[i] | - flag; - } else { - for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; - i++) - dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] = - src_phys_ptr[i] | - PTU_PDE_VALID; - } + for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) + dst_virt_ptr[0][i] = src_phys_ptr[i] | flag; + /* Alloc or init PTEs */ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_2], hwq_attr->sginfo); From 2c7dd3ca6be9065e6124a0834475a8ff9569655b Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Mon, 7 Oct 2024 01:01:49 -0400 Subject: [PATCH 08/32] drm/msm/dsi: fix 32-bit signed integer extension in pclk_rate calculation [ Upstream commit 358b762400bd94db2a14a72dfcef74c7da6bd845 ] When (mode->clock * 1000) is larger than (1<<31), int to unsigned long conversion will sign extend the int to 64 bits and the pclk_rate value will be incorrect. Fix this by making the result of the multiplication unsigned. Note that above (1<<32) would still be broken and require more changes, but its unlikely anyone will need that anytime soon. Fixes: c4d8cfe516dc ("drm/msm/dsi: add implementation for helper functions") Signed-off-by: Jonathan Marek Reviewed-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/618434/ Link: https://lore.kernel.org/r/20241007050157.26855-2-jonathan@marek.ca Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/dsi/dsi_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index fb7792ca39e2..b69099b533bf 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -685,7 +685,7 @@ static u32 dsi_get_pclk_rate(struct msm_dsi_host *msm_host, bool is_dual_dsi) struct drm_display_mode *mode = msm_host->mode; u32 pclk_rate; - pclk_rate = mode->clock * 1000; + pclk_rate = mode->clock * 1000u; /* * For dual DSI mode, the current DRM mode has the complete width of the From 56dbb74b6a4b09f37be654e21e89157e7b24e083 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 11 Oct 2024 17:16:37 +0200 Subject: [PATCH 09/32] macsec: don't increment counters for an unrelated SA [ Upstream commit cf58aefb1332db322060cad4a330d5f9292b0f41 ] On RX, we shouldn't be incrementing the stats for an arbitrary SA in case the actual SA hasn't been set up. Those counters are intended to track packets for their respective AN when the SA isn't currently configured. Due to the way MACsec is implemented, we don't keep counters unless the SA is configured, so we can't track those packets, and those counters will remain at 0. The RXSC's stats keeps track of those packets without telling us which AN they belonged to. We could add counters for non-existent SAs, and then find a way to integrate them in the dump to userspace, but I don't think it's worth the effort. Fixes: 91ec9bd57f35 ("macsec: Fix traffic counters/statistics") Reported-by: Paolo Abeni Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/f5ac92aaa5b89343232615f4c03f9f95042c6aa0.1728657709.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 83b02dc7dfd2..5e30fd017b3a 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -159,19 +159,6 @@ static struct macsec_rx_sa *macsec_rxsa_get(struct macsec_rx_sa __rcu *ptr) return sa; } -static struct macsec_rx_sa *macsec_active_rxsa_get(struct macsec_rx_sc *rx_sc) -{ - struct macsec_rx_sa *sa = NULL; - int an; - - for (an = 0; an < MACSEC_NUM_AN; an++) { - sa = macsec_rxsa_get(rx_sc->sa[an]); - if (sa) - break; - } - return sa; -} - static void free_rx_sc_rcu(struct rcu_head *head) { struct macsec_rx_sc *rx_sc = container_of(head, struct macsec_rx_sc, rcu_head); @@ -1196,15 +1183,12 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) /* If validateFrames is Strict or the C bit in the * SecTAG is set, discard */ - struct macsec_rx_sa *active_rx_sa = macsec_active_rxsa_get(rx_sc); if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsNotUsingSA++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(secy->netdev, rx_errors); - if (active_rx_sa) - this_cpu_inc(active_rx_sa->stats->InPktsNotUsingSA); goto drop_nosa; } @@ -1214,8 +1198,6 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsUnusedSA++; u64_stats_update_end(&rxsc_stats->syncp); - if (active_rx_sa) - this_cpu_inc(active_rx_sa->stats->InPktsUnusedSA); goto deliver; } From aacf6e28aedaf5a4f6f9d5acd94349ec18fab8ab Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Sat, 12 Oct 2024 19:04:34 +0800 Subject: [PATCH 10/32] net: ethernet: aeroflex: fix potential memory leak in greth_start_xmit_gbit() [ Upstream commit cf57b5d7a2aad456719152ecd12007fe031628a3 ] The greth_start_xmit_gbit() returns NETDEV_TX_OK without freeing skb in case of skb->len being too long, add dev_kfree_skb() to fix it. Fixes: d4c41139df6e ("net: Add Aeroflex Gaisler 10/100/1G Ethernet MAC driver") Signed-off-by: Wang Hai Reviewed-by: Gerhard Engleder Link: https://patch.msgid.link/20241012110434.49265-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/aeroflex/greth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 0d56cb4f5dd9..c84b9acc319f 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -484,7 +484,7 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb->len > MAX_FRAME_SIZE)) { dev->stats.tx_errors++; - goto out; + goto len_error; } /* Save skb pointer. */ @@ -575,6 +575,7 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev) map_error: if (net_ratelimit()) dev_warn(greth->dev, "Could not create TX DMA mapping\n"); +len_error: dev_kfree_skb(skb); out: return err; From f48eaf4e88c378a3e8dfb9f69e28a8eac788e3d7 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 14 Oct 2024 19:53:21 +0800 Subject: [PATCH 11/32] net/smc: Fix searching in list of known pnetids in smc_pnet_add_pnetid [ Upstream commit 82ac39ebd6db0c9f7a97a934bda1e3e101a9d201 ] pnetid of pi (not newly allocated pe) should be compared Fixes: e888a2e8337c ("net/smc: introduce list of pnetids for Ethernet devices") Reviewed-by: D. Wythe Reviewed-by: Wen Gu Signed-off-by: Li RongQing Reviewed-by: Simon Horman Reviewed-by: Gerd Bayer Link: https://patch.msgid.link/20241014115321.33234-1-lirongqing@baidu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/smc/smc_pnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index ed9cfa11b589..7824b32cdb66 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -744,7 +744,7 @@ static int smc_pnet_add_pnetid(struct net *net, u8 *pnetid) write_lock(&sn->pnetids_ndev.lock); list_for_each_entry(pi, &sn->pnetids_ndev.list, list) { - if (smc_pnet_match(pnetid, pe->pnetid)) { + if (smc_pnet_match(pnetid, pi->pnetid)) { refcount_inc(&pi->refcnt); kfree(pe); goto unlock; From b6321146773dcbbc372a54dbada67e0b50e0a25c Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Mon, 14 Oct 2024 22:51:15 +0800 Subject: [PATCH 12/32] net: systemport: fix potential memory leak in bcm_sysport_xmit() [ Upstream commit c401ed1c709948e57945485088413e1bb5e94bd1 ] The bcm_sysport_xmit() returns NETDEV_TX_OK without freeing skb in case of dma_map_single() fails, add dev_kfree_skb() to fix it. Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver") Signed-off-by: Wang Hai Link: https://patch.msgid.link/20241014145115.44977-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bcmsysport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 82d369d9f7a5..ae1cf2ead9a9 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1343,6 +1343,7 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, netif_err(priv, tx_err, dev, "DMA map failed at %p (len=%d)\n", skb->data, skb_len); ret = NETDEV_TX_OK; + dev_kfree_skb_any(skb); goto out; } From 4af714e82379984974a1b61a5629cdb302673dda Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Oct 2024 17:12:17 +0000 Subject: [PATCH 13/32] genetlink: hold RCU in genlmsg_mcast() [ Upstream commit 56440d7ec28d60f8da3bfa09062b3368ff9b16db ] While running net selftests with CONFIG_PROVE_RCU_LIST=y I saw one lockdep splat [1]. genlmsg_mcast() uses for_each_net_rcu(), and must therefore hold RCU. Instead of letting all callers guard genlmsg_multicast_allns() with a rcu_read_lock()/rcu_read_unlock() pair, do it in genlmsg_mcast(). This also means the @flags parameter is useless, we need to always use GFP_ATOMIC. [1] [10882.424136] ============================= [10882.424166] WARNING: suspicious RCU usage [10882.424309] 6.12.0-rc2-virtme #1156 Not tainted [10882.424400] ----------------------------- [10882.424423] net/netlink/genetlink.c:1940 RCU-list traversed in non-reader section!! [10882.424469] other info that might help us debug this: [10882.424500] rcu_scheduler_active = 2, debug_locks = 1 [10882.424744] 2 locks held by ip/15677: [10882.424791] #0: ffffffffb6b491b0 (cb_lock){++++}-{3:3}, at: genl_rcv (net/netlink/genetlink.c:1219) [10882.426334] #1: ffffffffb6b49248 (genl_mutex){+.+.}-{3:3}, at: genl_rcv_msg (net/netlink/genetlink.c:61 net/netlink/genetlink.c:57 net/netlink/genetlink.c:1209) [10882.426465] stack backtrace: [10882.426805] CPU: 14 UID: 0 PID: 15677 Comm: ip Not tainted 6.12.0-rc2-virtme #1156 [10882.426919] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [10882.427046] Call Trace: [10882.427131] [10882.427244] dump_stack_lvl (lib/dump_stack.c:123) [10882.427335] lockdep_rcu_suspicious (kernel/locking/lockdep.c:6822) [10882.427387] genlmsg_multicast_allns (net/netlink/genetlink.c:1940 (discriminator 7) net/netlink/genetlink.c:1977 (discriminator 7)) [10882.427436] l2tp_tunnel_notify.constprop.0 (net/l2tp/l2tp_netlink.c:119) l2tp_netlink [10882.427683] l2tp_nl_cmd_tunnel_create (net/l2tp/l2tp_netlink.c:253) l2tp_netlink [10882.427748] genl_family_rcv_msg_doit (net/netlink/genetlink.c:1115) [10882.427834] genl_rcv_msg (net/netlink/genetlink.c:1195 net/netlink/genetlink.c:1210) [10882.427877] ? __pfx_l2tp_nl_cmd_tunnel_create (net/l2tp/l2tp_netlink.c:186) l2tp_netlink [10882.427927] ? __pfx_genl_rcv_msg (net/netlink/genetlink.c:1201) [10882.427959] netlink_rcv_skb (net/netlink/af_netlink.c:2551) [10882.428069] genl_rcv (net/netlink/genetlink.c:1220) [10882.428095] netlink_unicast (net/netlink/af_netlink.c:1332 net/netlink/af_netlink.c:1357) [10882.428140] netlink_sendmsg (net/netlink/af_netlink.c:1901) [10882.428210] ____sys_sendmsg (net/socket.c:729 (discriminator 1) net/socket.c:744 (discriminator 1) net/socket.c:2607 (discriminator 1)) Fixes: 33f72e6f0c67 ("l2tp : multicast notification to the registered listeners") Signed-off-by: Eric Dumazet Cc: James Chapman Cc: Tom Parkin Cc: Johannes Berg Link: https://patch.msgid.link/20241011171217.3166614-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/target/target_core_user.c | 2 +- include/net/genetlink.h | 3 +-- net/l2tp/l2tp_netlink.c | 4 ++-- net/netlink/genetlink.c | 28 ++++++++++++++-------------- net/wireless/nl80211.c | 8 ++------ 5 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c283e45ac300..2ac973291b1f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1988,7 +1988,7 @@ static int tcmu_netlink_event_send(struct tcmu_dev *udev, } ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0, - TCMU_MCGRP_CONFIG, GFP_KERNEL); + TCMU_MCGRP_CONFIG); /* Wait during an add as the listener may not be up yet */ if (ret == 0 || diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 3057c8e6dcfe..e00f617d4b6c 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -335,13 +335,12 @@ static inline int genlmsg_multicast(const struct genl_family *family, * @skb: netlink message as socket buffer * @portid: own netlink portid to avoid sending to yourself * @group: offset of multicast group in groups array - * @flags: allocation flags * * This function must hold the RTNL or rcu_read_lock(). */ int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, - unsigned int group, gfp_t flags); + unsigned int group); /** * genlmsg_unicast - unicast a netlink message diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 96eb91be9238..f34ca225c219 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -115,7 +115,7 @@ static int l2tp_tunnel_notify(struct genl_family *family, NLM_F_ACK, tunnel, cmd); if (ret >= 0) { - ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + ret = genlmsg_multicast_allns(family, msg, 0, 0); /* We don't care if no one is listening */ if (ret == -ESRCH) ret = 0; @@ -143,7 +143,7 @@ static int l2tp_session_notify(struct genl_family *family, NLM_F_ACK, session, cmd); if (ret >= 0) { - ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + ret = genlmsg_multicast_allns(family, msg, 0, 0); /* We don't care if no one is listening */ if (ret == -ESRCH) ret = 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index e9035de65546..e085ceec96a4 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1097,15 +1097,11 @@ static int genl_ctrl_event(int event, const struct genl_family *family, if (IS_ERR(msg)) return PTR_ERR(msg); - if (!family->netnsok) { + if (!family->netnsok) genlmsg_multicast_netns(&genl_ctrl, &init_net, msg, 0, 0, GFP_KERNEL); - } else { - rcu_read_lock(); - genlmsg_multicast_allns(&genl_ctrl, msg, 0, - 0, GFP_ATOMIC); - rcu_read_unlock(); - } + else + genlmsg_multicast_allns(&genl_ctrl, msg, 0, 0); return 0; } @@ -1449,23 +1445,23 @@ static int __init genl_init(void) core_initcall(genl_init); -static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, - gfp_t flags) +static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group) { struct sk_buff *tmp; struct net *net, *prev = NULL; bool delivered = false; int err; + rcu_read_lock(); for_each_net_rcu(net) { if (prev) { - tmp = skb_clone(skb, flags); + tmp = skb_clone(skb, GFP_ATOMIC); if (!tmp) { err = -ENOMEM; goto error; } err = nlmsg_multicast(prev->genl_sock, tmp, - portid, group, flags); + portid, group, GFP_ATOMIC); if (!err) delivered = true; else if (err != -ESRCH) @@ -1474,26 +1470,30 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, prev = net; } + err = nlmsg_multicast(prev->genl_sock, skb, portid, group, GFP_ATOMIC); + + rcu_read_unlock(); - err = nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); if (!err) delivered = true; else if (err != -ESRCH) return err; return delivered ? 0 : -ESRCH; error: + rcu_read_unlock(); + kfree_skb(skb); return err; } int genlmsg_multicast_allns(const struct genl_family *family, struct sk_buff *skb, u32 portid, - unsigned int group, gfp_t flags) + unsigned int group) { if (WARN_ON_ONCE(group >= family->n_mcgrps)) return -EINVAL; group = family->mcgrp_offset + group; - return genlmsg_mcast(skb, portid, group, flags); + return genlmsg_mcast(skb, portid, group); } EXPORT_SYMBOL(genlmsg_multicast_allns); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a989231198fe..93b89f835e38 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -15874,10 +15874,8 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, genlmsg_end(msg, hdr); - rcu_read_lock(); genlmsg_multicast_allns(&nl80211_fam, msg, 0, - NL80211_MCGRP_REGULATORY, GFP_ATOMIC); - rcu_read_unlock(); + NL80211_MCGRP_REGULATORY); return; @@ -16385,10 +16383,8 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, genlmsg_end(msg, hdr); - rcu_read_lock(); genlmsg_multicast_allns(&nl80211_fam, msg, 0, - NL80211_MCGRP_REGULATORY, GFP_ATOMIC); - rcu_read_unlock(); + NL80211_MCGRP_REGULATORY); return; From 8c1e6717f60d31f8af3937c23c4f1498529584e1 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 11 Oct 2024 19:34:44 +0800 Subject: [PATCH 14/32] scsi: target: core: Fix null-ptr-deref in target_alloc_device() [ Upstream commit fca6caeb4a61d240f031914413fcc69534f6dc03 ] There is a null-ptr-deref issue reported by KASAN: BUG: KASAN: null-ptr-deref in target_alloc_device+0xbc4/0xbe0 [target_core_mod] ... kasan_report+0xb9/0xf0 target_alloc_device+0xbc4/0xbe0 [target_core_mod] core_dev_setup_virtual_lun0+0xef/0x1f0 [target_core_mod] target_core_init_configfs+0x205/0x420 [target_core_mod] do_one_initcall+0xdd/0x4e0 ... entry_SYSCALL_64_after_hwframe+0x76/0x7e In target_alloc_device(), if allocing memory for dev queues fails, then dev will be freed by dev->transport->free_device(), but dev->transport is not initialized at that time, which will lead to a null pointer reference problem. Fixing this bug by freeing dev with hba->backend->ops->free_device(). Fixes: 1526d9f10c61 ("scsi: target: Make state_list per CPU") Signed-off-by: Wang Hai Link: https://lore.kernel.org/r/20241011113444.40749-1-wanghai38@huawei.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/target/target_core_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index daa4d06ce233..50135f8df1b4 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -727,7 +727,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev->queues = kcalloc(nr_cpu_ids, sizeof(*dev->queues), GFP_KERNEL); if (!dev->queues) { - dev->transport->free_device(dev); + hba->backend->ops->free_device(dev); return NULL; } From ed31aba8ce93472d9e16f5cff844ae7c94e9601d Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 15 Oct 2024 19:04:04 -0300 Subject: [PATCH 15/32] smb: client: fix OOBs when building SMB2_IOCTL request [ Upstream commit 1ab60323c5201bef25f2a3dc0ccc404d9aca77f1 ] When using encryption, either enforced by the server or when using 'seal' mount option, the client will squash all compound request buffers down for encryption into a single iov in smb2_set_next_command(). SMB2_ioctl_init() allocates a small buffer (448 bytes) to hold the SMB2_IOCTL request in the first iov, and if the user passes an input buffer that is greater than 328 bytes, smb2_set_next_command() will end up writing off the end of @rqst->iov[0].iov_base as shown below: mount.cifs //srv/share /mnt -o ...,seal ln -s $(perl -e "print('a')for 1..1024") /mnt/link BUG: KASAN: slab-out-of-bounds in smb2_set_next_command.cold+0x1d6/0x24c [cifs] Write of size 4116 at addr ffff8881148fcab8 by task ln/859 CPU: 1 UID: 0 PID: 859 Comm: ln Not tainted 6.12.0-rc3 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x80 ? smb2_set_next_command.cold+0x1d6/0x24c [cifs] print_report+0x156/0x4d9 ? smb2_set_next_command.cold+0x1d6/0x24c [cifs] ? __virt_addr_valid+0x145/0x310 ? __phys_addr+0x46/0x90 ? smb2_set_next_command.cold+0x1d6/0x24c [cifs] kasan_report+0xda/0x110 ? smb2_set_next_command.cold+0x1d6/0x24c [cifs] kasan_check_range+0x10f/0x1f0 __asan_memcpy+0x3c/0x60 smb2_set_next_command.cold+0x1d6/0x24c [cifs] smb2_compound_op+0x238c/0x3840 [cifs] ? kasan_save_track+0x14/0x30 ? kasan_save_free_info+0x3b/0x70 ? vfs_symlink+0x1a1/0x2c0 ? do_symlinkat+0x108/0x1c0 ? __pfx_smb2_compound_op+0x10/0x10 [cifs] ? kmem_cache_free+0x118/0x3e0 ? cifs_get_writable_path+0xeb/0x1a0 [cifs] smb2_get_reparse_inode+0x423/0x540 [cifs] ? __pfx_smb2_get_reparse_inode+0x10/0x10 [cifs] ? rcu_is_watching+0x20/0x50 ? __kmalloc_noprof+0x37c/0x480 ? smb2_create_reparse_symlink+0x257/0x490 [cifs] ? smb2_create_reparse_symlink+0x38f/0x490 [cifs] smb2_create_reparse_symlink+0x38f/0x490 [cifs] ? __pfx_smb2_create_reparse_symlink+0x10/0x10 [cifs] ? find_held_lock+0x8a/0xa0 ? hlock_class+0x32/0xb0 ? __build_path_from_dentry_optional_prefix+0x19d/0x2e0 [cifs] cifs_symlink+0x24f/0x960 [cifs] ? __pfx_make_vfsuid+0x10/0x10 ? __pfx_cifs_symlink+0x10/0x10 [cifs] ? make_vfsgid+0x6b/0xc0 ? generic_permission+0x96/0x2d0 vfs_symlink+0x1a1/0x2c0 do_symlinkat+0x108/0x1c0 ? __pfx_do_symlinkat+0x10/0x10 ? strncpy_from_user+0xaa/0x160 __x64_sys_symlinkat+0xb9/0xf0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f08d75c13bb Reported-by: David Howells Fixes: e77fe73c7e38 ("cifs: we can not use small padding iovs together with encryption") Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2pdu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index aa3211d8cce3..03651cc6b7a5 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2961,6 +2961,15 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, return rc; if (indatalen) { + unsigned int len; + + if (WARN_ON_ONCE(smb3_encryption_required(tcon) && + (check_add_overflow(total_len - 1, + ALIGN(indatalen, 8), &len) || + len > MAX_CIFS_SMALL_BUFFER_SIZE))) { + cifs_small_buf_release(req); + return -EIO; + } /* * indatalen is usually small at a couple of bytes max, so * just allocate through generic pool From 6af43ec3bf40f8b428d9134ffa7a291aecd60da8 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 4 Oct 2024 09:37:38 -0300 Subject: [PATCH 16/32] usb: typec: altmode should keep reference to parent [ Upstream commit befab3a278c59db0cc88c8799638064f6d3fd6f8 ] The altmode device release refers to its parent device, but without keeping a reference to it. When registering the altmode, get a reference to the parent and put it in the release function. Before this fix, when using CONFIG_DEBUG_KOBJECT_RELEASE, we see issues like this: [ 43.572860] kobject: 'port0.0' (ffff8880057ba008): kobject_release, parent 0000000000000000 (delayed 3000) [ 43.573532] kobject: 'port0.1' (ffff8880057bd008): kobject_release, parent 0000000000000000 (delayed 1000) [ 43.574407] kobject: 'port0' (ffff8880057b9008): kobject_release, parent 0000000000000000 (delayed 3000) [ 43.575059] kobject: 'port1.0' (ffff8880057ca008): kobject_release, parent 0000000000000000 (delayed 4000) [ 43.575908] kobject: 'port1.1' (ffff8880057c9008): kobject_release, parent 0000000000000000 (delayed 4000) [ 43.576908] kobject: 'typec' (ffff8880062dbc00): kobject_release, parent 0000000000000000 (delayed 4000) [ 43.577769] kobject: 'port1' (ffff8880057bf008): kobject_release, parent 0000000000000000 (delayed 3000) [ 46.612867] ================================================================== [ 46.613402] BUG: KASAN: slab-use-after-free in typec_altmode_release+0x38/0x129 [ 46.614003] Read of size 8 at addr ffff8880057b9118 by task kworker/2:1/48 [ 46.614538] [ 46.614668] CPU: 2 UID: 0 PID: 48 Comm: kworker/2:1 Not tainted 6.12.0-rc1-00138-gedbae730ad31 #535 [ 46.615391] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 46.616042] Workqueue: events kobject_delayed_cleanup [ 46.616446] Call Trace: [ 46.616648] [ 46.616820] dump_stack_lvl+0x5b/0x7c [ 46.617112] ? typec_altmode_release+0x38/0x129 [ 46.617470] print_report+0x14c/0x49e [ 46.617769] ? rcu_read_unlock_sched+0x56/0x69 [ 46.618117] ? __virt_addr_valid+0x19a/0x1ab [ 46.618456] ? kmem_cache_debug_flags+0xc/0x1d [ 46.618807] ? typec_altmode_release+0x38/0x129 [ 46.619161] kasan_report+0x8d/0xb4 [ 46.619447] ? typec_altmode_release+0x38/0x129 [ 46.619809] ? process_scheduled_works+0x3cb/0x85f [ 46.620185] typec_altmode_release+0x38/0x129 [ 46.620537] ? process_scheduled_works+0x3cb/0x85f [ 46.620907] device_release+0xaf/0xf2 [ 46.621206] kobject_delayed_cleanup+0x13b/0x17a [ 46.621584] process_scheduled_works+0x4f6/0x85f [ 46.621955] ? __pfx_process_scheduled_works+0x10/0x10 [ 46.622353] ? hlock_class+0x31/0x9a [ 46.622647] ? lock_acquired+0x361/0x3c3 [ 46.622956] ? move_linked_works+0x46/0x7d [ 46.623277] worker_thread+0x1ce/0x291 [ 46.623582] ? __kthread_parkme+0xc8/0xdf [ 46.623900] ? __pfx_worker_thread+0x10/0x10 [ 46.624236] kthread+0x17e/0x190 [ 46.624501] ? kthread+0xfb/0x190 [ 46.624756] ? __pfx_kthread+0x10/0x10 [ 46.625015] ret_from_fork+0x20/0x40 [ 46.625268] ? __pfx_kthread+0x10/0x10 [ 46.625532] ret_from_fork_asm+0x1a/0x30 [ 46.625805] [ 46.625953] [ 46.626056] Allocated by task 678: [ 46.626287] kasan_save_stack+0x24/0x44 [ 46.626555] kasan_save_track+0x14/0x2d [ 46.626811] __kasan_kmalloc+0x3f/0x4d [ 46.627049] __kmalloc_noprof+0x1bf/0x1f0 [ 46.627362] typec_register_port+0x23/0x491 [ 46.627698] cros_typec_probe+0x634/0xbb6 [ 46.628026] platform_probe+0x47/0x8c [ 46.628311] really_probe+0x20a/0x47d [ 46.628605] device_driver_attach+0x39/0x72 [ 46.628940] bind_store+0x87/0xd7 [ 46.629213] kernfs_fop_write_iter+0x1aa/0x218 [ 46.629574] vfs_write+0x1d6/0x29b [ 46.629856] ksys_write+0xcd/0x13b [ 46.630128] do_syscall_64+0xd4/0x139 [ 46.630420] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 46.630820] [ 46.630946] Freed by task 48: [ 46.631182] kasan_save_stack+0x24/0x44 [ 46.631493] kasan_save_track+0x14/0x2d [ 46.631799] kasan_save_free_info+0x3f/0x4d [ 46.632144] __kasan_slab_free+0x37/0x45 [ 46.632474] kfree+0x1d4/0x252 [ 46.632725] device_release+0xaf/0xf2 [ 46.633017] kobject_delayed_cleanup+0x13b/0x17a [ 46.633388] process_scheduled_works+0x4f6/0x85f [ 46.633764] worker_thread+0x1ce/0x291 [ 46.634065] kthread+0x17e/0x190 [ 46.634324] ret_from_fork+0x20/0x40 [ 46.634621] ret_from_fork_asm+0x1a/0x30 Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Heikki Krogerus Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20241004123738.2964524-1-cascardo@igalia.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/typec/class.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 7fa95e701244..dec83edb09de 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -430,6 +430,7 @@ static void typec_altmode_release(struct device *dev) typec_altmode_put_partner(alt); altmode_id_remove(alt->adev.dev.parent, alt->id); + put_device(alt->adev.dev.parent); kfree(alt); } @@ -479,6 +480,8 @@ typec_register_altmode(struct device *parent, alt->adev.dev.type = &typec_altmode_dev_type; dev_set_name(&alt->adev.dev, "%s.%u", dev_name(parent), id); + get_device(alt->adev.dev.parent); + /* Link partners and plugs with the ports */ if (!is_port) typec_altmode_set_partner(alt); From 5c345c47e88497ed5887bd921bb2a74f38cbae59 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Oct 2024 17:52:39 +0200 Subject: [PATCH 17/32] s390: Initialize psw mask in perf_arch_fetch_caller_regs() [ Upstream commit 223e7fb979fa06934f1595b6ad0ae1d4ead1147f ] Also initialize regs->psw.mask in perf_arch_fetch_caller_regs(). This way user_mode(regs) will return false, like it should. It looks like all current users initialize regs to zero, so that this doesn't fix a bug currently. However it is better to not rely on callers to do this. Fixes: 914d52e46490 ("s390: implement perf_arch_fetch_caller_regs") Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/include/asm/perf_event.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index b9da71632827..ea340b901839 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -75,6 +75,7 @@ struct perf_sf_sde_regs { #define SAMPLE_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE) #define perf_arch_fetch_caller_regs(regs, __ip) do { \ + (regs)->psw.mask = 0; \ (regs)->psw.addr = (__ip); \ (regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) - \ offsetof(struct stack_frame, back_chain); \ From 6c151aeb6dc414db8f4daf51be072e802fae6667 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 14 Oct 2024 17:07:08 +0800 Subject: [PATCH 18/32] Bluetooth: bnep: fix wild-memory-access in proto_unregister [ Upstream commit 64a90991ba8d4e32e3173ddd83d0b24167a5668c ] There's issue as follows: KASAN: maybe wild-memory-access in range [0xdead...108-0xdead...10f] CPU: 3 UID: 0 PID: 2805 Comm: rmmod Tainted: G W RIP: 0010:proto_unregister+0xee/0x400 Call Trace: __do_sys_delete_module+0x318/0x580 do_syscall_64+0xc1/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f As bnep_init() ignore bnep_sock_init()'s return value, and bnep_sock_init() will cleanup all resource. Then when remove bnep module will call bnep_sock_cleanup() to cleanup sock's resource. To solve above issue just return bnep_sock_init()'s return value in bnep_exit(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ye Bin Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/bnep/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index f74990427296..0eaa47ae6e99 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -745,8 +745,7 @@ static int __init bnep_init(void) if (flt[0]) BT_INFO("BNEP filters: %s", flt); - bnep_sock_init(); - return 0; + return bnep_sock_init(); } static void __exit bnep_exit(void) From 7f1ef59185d23450d77e94671c0abb03f8978c01 Mon Sep 17 00:00:00 2001 From: junhua huang Date: Fri, 2 Dec 2022 15:11:10 +0800 Subject: [PATCH 19/32] arm64:uprobe fix the uprobe SWBP_INSN in big-endian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 60f07e22a73d318cddaafa5ef41a10476807cc07 ] We use uprobe in aarch64_be, which we found the tracee task would exit due to SIGILL when we enable the uprobe trace. We can see the replace inst from uprobe is not correct in aarch big-endian. As in Armv8-A, instruction fetches are always treated as little-endian, we should treat the UPROBE_SWBP_INSN as little-endian。 The test case is as following。 bash-4.4# ./mqueue_test_aarchbe 1 1 2 1 10 > /dev/null & bash-4.4# cd /sys/kernel/debug/tracing/ bash-4.4# echo 'p:test /mqueue_test_aarchbe:0xc30 %x0 %x1' > uprobe_events bash-4.4# echo 1 > events/uprobes/enable bash-4.4# bash-4.4# ps PID TTY TIME CMD 140 ? 00:00:01 bash 237 ? 00:00:00 ps [1]+ Illegal instruction ./mqueue_test_aarchbe 1 1 2 1 100 > /dev/null which we debug use gdb as following: bash-4.4# gdb attach 155 (gdb) disassemble send Dump of assembler code for function send: 0x0000000000400c30 <+0>: .inst 0xa00020d4 ; undefined 0x0000000000400c34 <+4>: mov x29, sp 0x0000000000400c38 <+8>: str w0, [sp, #28] 0x0000000000400c3c <+12>: strb w1, [sp, #27] 0x0000000000400c40 <+16>: str xzr, [sp, #40] 0x0000000000400c44 <+20>: str xzr, [sp, #48] 0x0000000000400c48 <+24>: add x0, sp, #0x1b 0x0000000000400c4c <+28>: mov w3, #0x0 // #0 0x0000000000400c50 <+32>: mov x2, #0x1 // #1 0x0000000000400c54 <+36>: mov x1, x0 0x0000000000400c58 <+40>: ldr w0, [sp, #28] 0x0000000000400c5c <+44>: bl 0x405e10 0x0000000000400c60 <+48>: str w0, [sp, #60] 0x0000000000400c64 <+52>: ldr w0, [sp, #60] 0x0000000000400c68 <+56>: ldp x29, x30, [sp], #64 0x0000000000400c6c <+60>: ret End of assembler dump. (gdb) info b No breakpoints or watchpoints. (gdb) c Continuing. Program received signal SIGILL, Illegal instruction. 0x0000000000400c30 in send () (gdb) x/10x 0x400c30 0x400c30 : 0xd42000a0 0xfd030091 0xe01f00b9 0xe16f0039 0x400c40 : 0xff1700f9 0xff1b00f9 0xe06f0091 0x03008052 0x400c50 : 0x220080d2 0xe10300aa (gdb) disassemble 0x400c30 Dump of assembler code for function send: => 0x0000000000400c30 <+0>: .inst 0xa00020d4 ; undefined 0x0000000000400c34 <+4>: mov x29, sp 0x0000000000400c38 <+8>: str w0, [sp, #28] 0x0000000000400c3c <+12>: strb w1, [sp, #27] 0x0000000000400c40 <+16>: str xzr, [sp, #40] Signed-off-by: junhua huang Link: https://lore.kernel.org/r/202212021511106844809@zte.com.cn Signed-off-by: Will Deacon Stable-dep-of: 13f8f1e05f1d ("arm64: probes: Fix uprobes for big-endian kernels") Signed-off-by: Sasha Levin --- arch/arm64/include/asm/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 315eef654e39..ba4bff5ca674 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -12,7 +12,7 @@ #define MAX_UINSN_BYTES AARCH64_INSN_SIZE -#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES +#define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE #define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES From cf9ddf9ed94c15564a05bbf6e9f18dffa0c7df80 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 8 Oct 2024 16:58:48 +0100 Subject: [PATCH 20/32] arm64: probes: Fix uprobes for big-endian kernels [ Upstream commit 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 ] The arm64 uprobes code is broken for big-endian kernels as it doesn't convert the in-memory instruction encoding (which is always little-endian) into the kernel's native endianness before analyzing and simulating instructions. This may result in a few distinct problems: * The kernel may may erroneously reject probing an instruction which can safely be probed. * The kernel may erroneously erroneously permit stepping an instruction out-of-line when that instruction cannot be stepped out-of-line safely. * The kernel may erroneously simulate instruction incorrectly dur to interpretting the byte-swapped encoding. The endianness mismatch isn't caught by the compiler or sparse because: * The arch_uprobe::{insn,ixol} fields are encoded as arrays of u8, so the compiler and sparse have no idea these contain a little-endian 32-bit value. The core uprobes code populates these with a memcpy() which similarly does not handle endianness. * While the uprobe_opcode_t type is an alias for __le32, both arch_uprobe_analyze_insn() and arch_uprobe_skip_sstep() cast from u8[] to the similarly-named probe_opcode_t, which is an alias for u32. Hence there is no endianness conversion warning. Fix this by changing the arch_uprobe::{insn,ixol} fields to __le32 and adding the appropriate __le32_to_cpu() conversions prior to consuming the instruction encoding. The core uprobes copies these fields as opaque ranges of bytes, and so is unaffected by this change. At the same time, remove MAX_UINSN_BYTES and consistently use AARCH64_INSN_SIZE for clarity. Tested with the following: | #include | #include | | #define noinline __attribute__((noinline)) | | static noinline void *adrp_self(void) | { | void *addr; | | asm volatile( | " adrp %x0, adrp_self\n" | " add %x0, %x0, :lo12:adrp_self\n" | : "=r" (addr)); | } | | | int main(int argc, char *argv) | { | void *ptr = adrp_self(); | bool equal = (ptr == adrp_self); | | printf("adrp_self => %p\n" | "adrp_self() => %p\n" | "%s\n", | adrp_self, ptr, equal ? "EQUAL" : "NOT EQUAL"); | | return 0; | } .... where the adrp_self() function was compiled to: | 00000000004007e0 : | 4007e0: 90000000 adrp x0, 400000 <__ehdr_start> | 4007e4: 911f8000 add x0, x0, #0x7e0 | 4007e8: d65f03c0 ret Before this patch, the ADRP is not recognized, and is assumed to be steppable, resulting in corruption of the result: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0xffffffffff7e0 | NOT EQUAL After this patch, the ADRP is correctly recognized and simulated: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL Fixes: 9842ceae9fa8 ("arm64: Add uprobe support") Cc: stable@vger.kernel.org Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20241008155851.801546-4-mark.rutland@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/include/asm/uprobes.h | 8 +++----- arch/arm64/kernel/probes/uprobes.c | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index ba4bff5ca674..98f29a43bfe8 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include #include -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef u32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2c247634552b..8a02c549e57f 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler) From 511ca935092a5603ebebb0980e1a65a1e892a0f0 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 26 Nov 2021 17:45:47 +0100 Subject: [PATCH 21/32] KVM: s390: gaccess: Refactor gpa and length calculation [ Upstream commit 416e7f0c9d613bf84e182eba9547ae8f9f5bfa4c ] Improve readability by renaming the length variable and not calculating the offset manually. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Message-Id: <20211126164549.7046-2-scgl@linux.ibm.com> Signed-off-by: Janosch Frank Stable-dep-of: e8061f06185b ("KVM: s390: gaccess: Check if guest address is in memslot") Signed-off-by: Sasha Levin --- arch/s390/kvm/gaccess.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index b9f85b2dc053..9f80d95a4377 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -831,8 +831,9 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; - unsigned long _len, nr_pages, gpa, idx; + unsigned long nr_pages, gpa, idx; unsigned long pages_array[2]; + unsigned int fragment_len; unsigned long *pages; int need_ipte_lock; union asce asce; @@ -855,15 +856,15 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, ipte_lock(vcpu); rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { - gpa = *(pages + idx) + (ga & ~PAGE_MASK); - _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); + gpa = pages[idx] + offset_in_page(ga); + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); if (mode == GACC_STORE) - rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); + rc = kvm_write_guest(vcpu->kvm, gpa, data, fragment_len); else - rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); - len -= _len; - ga += _len; - data += _len; + rc = kvm_read_guest(vcpu->kvm, gpa, data, fragment_len); + len -= fragment_len; + ga += fragment_len; + data += fragment_len; } if (need_ipte_lock) ipte_unlock(vcpu); @@ -875,19 +876,20 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode) { - unsigned long _len, gpa; + unsigned int fragment_len; + unsigned long gpa; int rc = 0; while (len && !rc) { gpa = kvm_s390_real_to_abs(vcpu, gra); - _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); if (mode) - rc = write_guest_abs(vcpu, gpa, data, _len); + rc = write_guest_abs(vcpu, gpa, data, fragment_len); else - rc = read_guest_abs(vcpu, gpa, data, _len); - len -= _len; - gra += _len; - data += _len; + rc = read_guest_abs(vcpu, gpa, data, fragment_len); + len -= fragment_len; + gra += fragment_len; + data += fragment_len; } return rc; } From 472088ffb1cce6f3ea4317047ae94c32c554d661 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 26 Nov 2021 17:45:48 +0100 Subject: [PATCH 22/32] KVM: s390: gaccess: Refactor access address range check [ Upstream commit 7faa543df19bf62d4583a64d3902705747f2ad29 ] Do not round down the first address to the page boundary, just translate it normally, which gives the value we care about in the first place. Given this, translating a single address is just the special case of translating a range spanning a single page. Make the output optional, so the function can be used to just check a range. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Claudio Imbrenda Message-Id: <20211126164549.7046-3-scgl@linux.ibm.com> Signed-off-by: Janosch Frank Stable-dep-of: e8061f06185b ("KVM: s390: gaccess: Check if guest address is in memslot") Signed-off-by: Sasha Levin --- arch/s390/kvm/gaccess.c | 122 +++++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 53 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9f80d95a4377..9b9bfc333e62 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -794,35 +794,74 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, - unsigned long *pages, unsigned long nr_pages, - const union asce asce, enum gacc_mode mode) +/** + * guest_range_to_gpas() - Calculate guest physical addresses of page fragments + * covering a logical range + * @vcpu: virtual cpu + * @ga: guest address, start of range + * @ar: access register + * @gpas: output argument, may be NULL + * @len: length of range in bytes + * @asce: address-space-control element to use for translation + * @mode: access mode + * + * Translate a logical range to a series of guest absolute addresses, + * such that the concatenation of page fragments starting at each gpa make up + * the whole range. + * The translation is performed as if done by the cpu for the given @asce, @ar, + * @mode and state of the @vcpu. + * If the translation causes an exception, its program interruption code is + * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified + * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject + * a correct exception into the guest. + * The resulting gpas are stored into @gpas, unless it is NULL. + * + * Note: All fragments except the first one start at the beginning of a page. + * When deriving the boundaries of a fragment from a gpa, all but the last + * fragment end at the end of the page. + * + * Return: + * * 0 - success + * * <0 - translation could not be performed, for example if guest + * memory could not be accessed + * * >0 - an access exception occurred. In this case the returned value + * is the program interruption code and the contents of pgm may + * be used to inject an exception into the guest. + */ +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + unsigned long *gpas, unsigned long len, + const union asce asce, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned int offset = offset_in_page(ga); + unsigned int fragment_len; int lap_enabled, rc = 0; enum prot_type prot; + unsigned long gpa; lap_enabled = low_address_protection_enabled(vcpu, asce); - while (nr_pages) { + while (min(PAGE_SIZE - offset, len) > 0) { + fragment_len = min(PAGE_SIZE - offset, len); ga = kvm_s390_logical_to_effective(vcpu, ga); if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, PROT_TYPE_LA); - ga &= PAGE_MASK; if (psw_bits(*psw).dat) { - rc = guest_translate(vcpu, ga, pages, asce, mode, &prot); + rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot); if (rc < 0) return rc; } else { - *pages = kvm_s390_real_to_abs(vcpu, ga); - if (kvm_is_error_gpa(vcpu->kvm, *pages)) + gpa = kvm_s390_real_to_abs(vcpu, ga); + if (kvm_is_error_gpa(vcpu->kvm, gpa)) rc = PGM_ADDRESSING; } if (rc) return trans_exc(vcpu, rc, ga, ar, mode, prot); - ga += PAGE_SIZE; - pages++; - nr_pages--; + if (gpas) + *gpas++ = gpa; + offset = 0; + ga += fragment_len; + len -= fragment_len; } return 0; } @@ -831,10 +870,10 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; - unsigned long nr_pages, gpa, idx; - unsigned long pages_array[2]; + unsigned long nr_pages, idx; + unsigned long gpa_array[2]; unsigned int fragment_len; - unsigned long *pages; + unsigned long *gpas; int need_ipte_lock; union asce asce; int rc; @@ -846,30 +885,28 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; - pages = pages_array; - if (nr_pages > ARRAY_SIZE(pages_array)) - pages = vmalloc(array_size(nr_pages, sizeof(unsigned long))); - if (!pages) + gpas = gpa_array; + if (nr_pages > ARRAY_SIZE(gpa_array)) + gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); + if (!gpas) return -ENOMEM; need_ipte_lock = psw_bits(*psw).dat && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { - gpa = pages[idx] + offset_in_page(ga); - fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); + fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); if (mode == GACC_STORE) - rc = kvm_write_guest(vcpu->kvm, gpa, data, fragment_len); + rc = kvm_write_guest(vcpu->kvm, gpas[idx], data, fragment_len); else - rc = kvm_read_guest(vcpu->kvm, gpa, data, fragment_len); + rc = kvm_read_guest(vcpu->kvm, gpas[idx], data, fragment_len); len -= fragment_len; - ga += fragment_len; data += fragment_len; } if (need_ipte_lock) ipte_unlock(vcpu); - if (nr_pages > ARRAY_SIZE(pages_array)) - vfree(pages); + if (nr_pages > ARRAY_SIZE(gpa_array)) + vfree(gpas); return rc; } @@ -906,8 +943,6 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long *gpa, enum gacc_mode mode) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - enum prot_type prot; union asce asce; int rc; @@ -915,23 +950,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); if (rc) return rc; - if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (mode == GACC_STORE) - return trans_exc(vcpu, PGM_PROTECTION, gva, 0, - mode, PROT_TYPE_LA); - } - - if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot); - if (rc > 0) - return trans_exc(vcpu, rc, gva, 0, mode, prot); - } else { - *gpa = kvm_s390_real_to_abs(vcpu, gva); - if (kvm_is_error_gpa(vcpu->kvm, *gpa)) - return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); - } - - return rc; + return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode); } /** @@ -940,17 +959,14 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long length, enum gacc_mode mode) { - unsigned long gpa; - unsigned long currlen; + union asce asce; int rc = 0; + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); + if (rc) + return rc; ipte_lock(vcpu); - while (length > 0 && !rc) { - currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); - rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); - gva += currlen; - length -= currlen; - } + rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode); ipte_unlock(vcpu); return rc; From 6e1659b674b6144892fc2880e38134674a7b68ed Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 26 Nov 2021 17:45:49 +0100 Subject: [PATCH 23/32] KVM: s390: gaccess: Cleanup access to guest pages [ Upstream commit bad13799e0305deb258372b7298a86be4c78aaba ] Introduce a helper function for guest frame access. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Claudio Imbrenda Message-Id: <20211126164549.7046-4-scgl@linux.ibm.com> Signed-off-by: Janosch Frank Stable-dep-of: e8061f06185b ("KVM: s390: gaccess: Check if guest address is in memslot") Signed-off-by: Sasha Levin --- arch/s390/kvm/gaccess.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9b9bfc333e62..164f96ba61dd 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -866,6 +866,20 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, return 0; } +static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, + void *data, unsigned int len) +{ + const unsigned int offset = offset_in_page(gpa); + const gfn_t gfn = gpa_to_gfn(gpa); + int rc; + + if (mode == GACC_STORE) + rc = kvm_write_guest_page(kvm, gfn, data, offset, len); + else + rc = kvm_read_guest_page(kvm, gfn, data, offset, len); + return rc; +} + int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode) { @@ -896,10 +910,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); - if (mode == GACC_STORE) - rc = kvm_write_guest(vcpu->kvm, gpas[idx], data, fragment_len); - else - rc = kvm_read_guest(vcpu->kvm, gpas[idx], data, fragment_len); + rc = access_guest_page(vcpu->kvm, mode, gpas[idx], data, fragment_len); len -= fragment_len; data += fragment_len; } @@ -920,10 +931,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, while (len && !rc) { gpa = kvm_s390_real_to_abs(vcpu, gra); fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); - if (mode) - rc = write_guest_abs(vcpu, gpa, data, fragment_len); - else - rc = read_guest_abs(vcpu, gpa, data, fragment_len); + rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len); len -= fragment_len; gra += fragment_len; data += fragment_len; From bf83ba3c55d4f2b94882e8ecaae54f4807068dc3 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Tue, 17 Sep 2024 17:18:33 +0200 Subject: [PATCH 24/32] KVM: s390: gaccess: Check if guest address is in memslot [ Upstream commit e8061f06185be0a06a73760d6526b8b0feadfe52 ] Previously, access_guest_page() did not check whether the given guest address is inside of a memslot. This is not a problem, since kvm_write_guest_page/kvm_read_guest_page return -EFAULT in this case. However, -EFAULT is also returned when copy_to/from_user fails. When emulating a guest instruction, the address being outside a memslot usually means that an addressing exception should be injected into the guest. Failure in copy_to/from_user however indicates that something is wrong in userspace and hence should be handled there. To be able to distinguish these two cases, return PGM_ADDRESSING in access_guest_page() when the guest address is outside guest memory. In access_guest_real(), populate vcpu->arch.pgm.code such that kvm_s390_inject_prog_cond() can be used in the caller for injecting into the guest (if applicable). Since this adds a new return value to access_guest_page(), we need to make sure that other callers are not confused by the new positive return value. There are the following users of access_guest_page(): - access_guest_with_key() does the checking itself (in guest_range_to_gpas()), so this case should never happen. Even if, the handling is set up properly. - access_guest_real() just passes the return code to its callers, which are: - read_guest_real() - see below - write_guest_real() - see below There are the following users of read_guest_real(): - ar_translation() in gaccess.c which already returns PGM_* - setup_apcb10(), setup_apcb00(), setup_apcb11() in vsie.c which always return -EFAULT on read_guest_read() nonzero return - no change - shadow_crycb(), handle_stfle() always present this as validity, this could be handled better but doesn't change current behaviour - no change There are the following users of write_guest_real(): - kvm_s390_store_status_unloaded() always returns -EFAULT on write_guest_real() failure. Fixes: 2293897805c2 ("KVM: s390: add architecture compliant guest access functions") Cc: stable@vger.kernel.org Signed-off-by: Nico Boehr Reviewed-by: Heiko Carstens Link: https://lore.kernel.org/r/20240917151904.74314-2-nrb@linux.ibm.com Acked-by: Janosch Frank Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin --- arch/s390/kvm/gaccess.c | 4 ++++ arch/s390/kvm/gaccess.h | 14 ++++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 164f96ba61dd..5a880e254524 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -873,6 +873,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, const gfn_t gfn = gpa_to_gfn(gpa); int rc; + if (!gfn_to_memslot(kvm, gfn)) + return PGM_ADDRESSING; if (mode == GACC_STORE) rc = kvm_write_guest_page(kvm, gfn, data, offset, len); else @@ -936,6 +938,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, gra += fragment_len; data += fragment_len; } + if (rc > 0) + vcpu->arch.pgm.code = rc; return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 7c72a5e3449f..8ed2d6c7404f 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -344,11 +344,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data, * @len: number of bytes to copy * * Copy @len bytes from @data (kernel space) to @gra (guest real address). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest low address and key protection are not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying from @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to guest memory. */ @@ -367,11 +368,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, * @len: number of bytes to copy * * Copy @len bytes from @gra (guest real address) to @data (kernel space). - * It is up to the caller to ensure that the entire guest memory range is - * valid memory before calling this function. * Guest key protection is not checked. * - * Returns zero on success or -EFAULT on error. + * Returns zero on success, -EFAULT when copying to @data failed, or + * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info + * is also stored to allow injecting into the guest (if applicable) using + * kvm_s390_inject_prog_cond(). * * If an error occurs data may have been copied partially to kernel space. */ From 20e27c7739452f2785ca42e2cdf39bbcae7139ae Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Wed, 23 Oct 2024 11:43:14 +0800 Subject: [PATCH 25/32] block, bfq: fix procress reference leakage for bfqq in merge chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 73aeab373557fa6ee4ae0b742c6211ccd9859280 ] Original state: Process 1 Process 2 Process 3 Process 4 (BIC1) (BIC2) (BIC3) (BIC4) Λ | | | \--------------\ \-------------\ \-------------\| V V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 ref 0 1 2 4 After commit 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()"), if P1 issues a new IO: Without the patch: Process 1 Process 2 Process 3 Process 4 (BIC1) (BIC2) (BIC3) (BIC4) Λ | | | \------------------------------\ \-------------\| V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 ref 0 0 2 4 bfqq3 will be used to handle IO from P1, this is not expected, IO should be redirected to bfqq4; With the patch: ------------------------------------------- | | Process 1 Process 2 Process 3 | Process 4 (BIC1) (BIC2) (BIC3) | (BIC4) | | | | \-------------\ \-------------\| V V bfqq1--------->bfqq2---------->bfqq3----------->bfqq4 ref 0 0 2 4 IO is redirected to bfqq4, however, procress reference of bfqq3 is still 2, while there is only P2 using it. Fix the problem by calling bfq_merge_bfqqs() for each bfqq in the merge chain. Also change bfqq_merge_bfqqs() to return new_bfqq to simplify code. Fixes: 0e456dba86c7 ("block, bfq: choose the last bfqq from merge chain in bfq_setup_cooperator()") Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20240909134154.954924-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/bfq-iosched.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 515e3c1a5475..c1600e3ac333 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2774,10 +2774,12 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfq_put_queue(bfqq); } -static void -bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, - struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) +static struct bfq_queue *bfq_merge_bfqqs(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_queue *bfqq) { + struct bfq_queue *new_bfqq = bfqq->new_bfqq; + bfq_log_bfqq(bfqd, bfqq, "merging with queue %lu", (unsigned long)new_bfqq->pid); /* Save weight raising and idle window of the merged queues */ @@ -2845,6 +2847,8 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, new_bfqq->pid = -1; bfqq->bic = NULL; bfq_release_process_ref(bfqd, bfqq); + + return new_bfqq; } static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, @@ -2880,14 +2884,8 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, * fulfilled, i.e., bic can be redirected to new_bfqq * and bfqq can be put. */ - bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq, - new_bfqq); - /* - * If we get here, bio will be queued into new_queue, - * so use new_bfqq to decide whether bio and rq can be - * merged. - */ - bfqq = new_bfqq; + while (bfqq != new_bfqq) + bfqq = bfq_merge_bfqqs(bfqd, bfqd->bio_bic, bfqq); /* * Change also bqfd->bio_bfqq, as @@ -5444,6 +5442,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) bool waiting, idle_timer_disabled = false; if (new_bfqq) { + struct bfq_queue *old_bfqq = bfqq; /* * Release the request's reference to the old bfqq * and make sure one is taken to the shared queue. @@ -5459,18 +5458,18 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) * then complete the merge and redirect it to * new_bfqq. */ - if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) - bfq_merge_bfqqs(bfqd, RQ_BIC(rq), - bfqq, new_bfqq); + if (bic_to_bfqq(RQ_BIC(rq), 1) == bfqq) { + while (bfqq != new_bfqq) + bfqq = bfq_merge_bfqqs(bfqd, RQ_BIC(rq), bfqq); + } - bfq_clear_bfqq_just_created(bfqq); + bfq_clear_bfqq_just_created(old_bfqq); /* * rq is about to be enqueued into new_bfqq, * release rq reference on bfqq */ - bfq_put_queue(bfqq); + bfq_put_queue(old_bfqq); rq->elv.priv[1] = new_bfqq; - bfqq = new_bfqq; } bfq_update_io_thinktime(bfqd, bfqq); From c9b77438077d5a20c79ead95bcdaf9bd4797baaf Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 22 Oct 2024 15:45:51 -0300 Subject: [PATCH 26/32] exec: don't WARN for racy path_noexec check [ Upstream commit 0d196e7589cefe207d5d41f37a0a28a1fdeeb7c6 ] Both i_mode and noexec checks wrapped in WARN_ON stem from an artifact of the previous implementation. They used to legitimately check for the condition, but that got moved up in two commits: 633fb6ac3980 ("exec: move S_ISREG() check earlier") 0fd338b2d2cd ("exec: move path_noexec() check earlier") Instead of being removed said checks are WARN_ON'ed instead, which has some debug value. However, the spurious path_noexec check is racy, resulting in unwarranted warnings should someone race with setting the noexec flag. One can note there is more to perm-checking whether execve is allowed and none of the conditions are guaranteed to still hold after they were tested for. Additionally this does not validate whether the code path did any perm checking to begin with -- it will pass if the inode happens to be regular. Keep the redundant path_noexec() check even though it's mindless nonsense checking for guarantee that isn't given so drop the WARN. Reword the commentary and do small tidy ups while here. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240805131721.765484-1-mjguzik@gmail.com [brauner: keep redundant path_noexec() check] Signed-off-by: Christian Brauner [cascardo: keep exit label and use it] Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Sasha Levin --- fs/exec.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 6e5324c7e9b6..7144c541818f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -144,13 +144,11 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) goto out; /* - * may_open() has already checked for this, so it should be - * impossible to trip now. But we need to be extra cautious - * and check again at the very end too. + * Check do_open_execat() for an explanation. */ error = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || - path_noexec(&file->f_path))) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || + path_noexec(&file->f_path)) goto exit; fsnotify_open(file); @@ -919,16 +917,16 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) file = do_filp_open(fd, name, &open_exec_flags); if (IS_ERR(file)) - goto out; + return file; /* - * may_open() has already checked for this, so it should be - * impossible to trip now. But we need to be extra cautious - * and check again at the very end too. + * In the past the regular type check was here. It moved to may_open() in + * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is + * an invariant that all non-regular files error out before we get here. */ err = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || - path_noexec(&file->f_path))) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || + path_noexec(&file->f_path)) goto exit; err = deny_write_access(file); @@ -938,7 +936,6 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (name->name[0] != '\0') fsnotify_open(file); -out: return file; exit: From 649d64650623adc449c5f35b7b1156c1e497569d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Oct 2024 18:33:46 +0200 Subject: [PATCH 27/32] iomap: update ki_pos a little later in iomap_dio_complete upstream 936e114a245b6e38e0dbf706a67e7611fc993da1 commit. Move the ki_pos update down a bit to prepare for a better common helper that invalidates pages based of an iocb. Link: https://lkml.kernel.org/r/20230601145904.1385409-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Darrick J. Wong Cc: Al Viro Cc: Andreas Gruenbacher Cc: Anna Schumaker Cc: Chao Yu Cc: Christian Brauner Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Johannes Thumshirn Cc: Matthew Wilcox Cc: Miklos Szeredi Cc: Miklos Szeredi Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Xiubo Li Signed-off-by: Andrew Morton Signed-off-by: Mahmoud Adam Signed-off-by: Sasha Levin --- fs/iomap/direct-io.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 933f234d5bec..8a49c0d3a7b4 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -93,7 +93,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) if (offset + ret > dio->i_size && !(dio->flags & IOMAP_DIO_WRITE)) ret = dio->i_size - offset; - iocb->ki_pos += ret; } /* @@ -119,15 +118,18 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio) } inode_dio_end(file_inode(iocb->ki_filp)); - /* - * If this is a DSYNC write, make sure we push it to stable storage now - * that we've written data. - */ - if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC)) - ret = generic_write_sync(iocb, ret); + if (ret > 0) { + iocb->ki_pos += ret; + + /* + * If this is a DSYNC write, make sure we push it to stable + * storage now that we've written data. + */ + if (dio->flags & IOMAP_DIO_NEED_SYNC) + ret = generic_write_sync(iocb, ret); + } kfree(dio); - return ret; } EXPORT_SYMBOL_GPL(iomap_dio_complete); From 75f828e944dacaac8870418461d3d48a1ecf2331 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 27 Aug 2024 12:45:23 +0200 Subject: [PATCH 28/32] drm/vboxvideo: Replace fake VLA at end of vbva_mouse_pointer_shape with real VLA [ Upstream commit d92b90f9a54d9300a6e883258e79f36dab53bfae ] Replace the fake VLA at end of the vbva_mouse_pointer_shape shape with a real VLA to fix a "memcpy: detected field-spanning write error" warning: [ 13.319813] memcpy: detected field-spanning write (size 16896) of single field "p->data" at drivers/gpu/drm/vboxvideo/hgsmi_base.c:154 (size 4) [ 13.319841] WARNING: CPU: 0 PID: 1105 at drivers/gpu/drm/vboxvideo/hgsmi_base.c:154 hgsmi_update_pointer_shape+0x192/0x1c0 [vboxvideo] [ 13.320038] Call Trace: [ 13.320173] hgsmi_update_pointer_shape [vboxvideo] [ 13.320184] vbox_cursor_atomic_update [vboxvideo] Note as mentioned in the added comment it seems the original length calculation for the allocated and send hgsmi buffer is 4 bytes too large. Changing this is not the goal of this patch, so this behavior is kept. Signed-off-by: Hans de Goede Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240827104523.17442-1-hdegoede@redhat.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vboxvideo/hgsmi_base.c | 10 +++++++++- drivers/gpu/drm/vboxvideo/vboxvideo.h | 4 +--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vboxvideo/hgsmi_base.c b/drivers/gpu/drm/vboxvideo/hgsmi_base.c index 361d3193258e..7edc9cf6a606 100644 --- a/drivers/gpu/drm/vboxvideo/hgsmi_base.c +++ b/drivers/gpu/drm/vboxvideo/hgsmi_base.c @@ -135,7 +135,15 @@ int hgsmi_update_pointer_shape(struct gen_pool *ctx, u32 flags, flags |= VBOX_MOUSE_POINTER_VISIBLE; } - p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len, HGSMI_CH_VBVA, + /* + * The 4 extra bytes come from switching struct vbva_mouse_pointer_shape + * from having a 4 bytes fixed array at the end to using a proper VLA + * at the end. These 4 extra bytes were not subtracted from sizeof(*p) + * before the switch to the VLA, so this way the behavior is unchanged. + * Chances are these 4 extra bytes are not necessary but they are kept + * to avoid regressions. + */ + p = hgsmi_buffer_alloc(ctx, sizeof(*p) + pixel_len + 4, HGSMI_CH_VBVA, VBVA_MOUSE_POINTER_SHAPE); if (!p) return -ENOMEM; diff --git a/drivers/gpu/drm/vboxvideo/vboxvideo.h b/drivers/gpu/drm/vboxvideo/vboxvideo.h index a5de40fe1a76..bed285fe083c 100644 --- a/drivers/gpu/drm/vboxvideo/vboxvideo.h +++ b/drivers/gpu/drm/vboxvideo/vboxvideo.h @@ -351,10 +351,8 @@ struct vbva_mouse_pointer_shape { * Bytes in the gap between the AND and the XOR mask are undefined. * XOR mask scanlines have no gap between them and size of XOR mask is: * xor_len = width * 4 * height. - * - * Preallocate 4 bytes for accessing actual data as p->data. */ - u8 data[4]; + u8 data[]; } __packed; /* pointer is visible */ From 0329056e0730dee3f3ece2b329ecf19987aeae99 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Mon, 30 Sep 2024 14:08:28 +0800 Subject: [PATCH 29/32] ASoC: fsl_sai: Enable 'FIFO continue on error' FCONT bit [ Upstream commit 72455e33173c1a00c0ce93d2b0198eb45d5f4195 ] FCONT=1 means On FIFO error, the SAI will continue from the same word that caused the FIFO error to set after the FIFO warning flag has been cleared. Set FCONT bit in control register to avoid the channel swap issue after SAI xrun. Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/1727676508-22830-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/fsl/fsl_sai.c | 5 ++++- sound/soc/fsl/fsl_sai.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 03731d14d475..998102711da0 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -490,6 +490,9 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr4 |= FSL_SAI_CR4_FRSZ(slots); + /* Set to avoid channel swap */ + val_cr4 |= FSL_SAI_CR4_FCONT; + /* Set to output mode to avoid tri-stated data pins */ if (tx) val_cr4 |= FSL_SAI_CR4_CHMOD; @@ -515,7 +518,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, FSL_SAI_CR3_TRCE((1 << pins) - 1)); regmap_update_bits(sai->regmap, FSL_SAI_xCR4(tx, ofs), FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK | - FSL_SAI_CR4_CHMOD_MASK, + FSL_SAI_CR4_CHMOD_MASK | FSL_SAI_CR4_FCONT_MASK, val_cr4); regmap_update_bits(sai->regmap, FSL_SAI_xCR5(tx, ofs), FSL_SAI_CR5_WNW_MASK | FSL_SAI_CR5_W0W_MASK | diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h index 691847d54b17..eff3b7b2dd3e 100644 --- a/sound/soc/fsl/fsl_sai.h +++ b/sound/soc/fsl/fsl_sai.h @@ -132,6 +132,7 @@ /* SAI Transmit and Receive Configuration 4 Register */ +#define FSL_SAI_CR4_FCONT_MASK BIT(28) #define FSL_SAI_CR4_FCONT BIT(28) #define FSL_SAI_CR4_FCOMB_SHIFT BIT(26) #define FSL_SAI_CR4_FCOMB_SOFT BIT(27) From 8605ca4bd0f469afab213e2f8df841fe62b93df6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 27 Sep 2024 11:18:38 +0100 Subject: [PATCH 30/32] arm64: Force position-independent veneers [ Upstream commit 9abe390e689f4f5c23c5f507754f8678431b4f72 ] Certain portions of code always need to be position-independent regardless of CONFIG_RELOCATABLE, including code which is executed in an idmap or which is executed before relocations are applied. In some kernel configurations the LLD linker generates position-dependent veneers for such code, and when executed these result in early boot-time failures. Marc Zyngier encountered a boot failure resulting from this when building a (particularly cursed) configuration with LLVM, as he reported to the list: https://lore.kernel.org/linux-arm-kernel/86wmjwvatn.wl-maz@kernel.org/ In Marc's kernel configuration, the .head.text and .rodata.text sections end up more than 128MiB apart, requiring a veneer to branch between the two: | [mark@lakrids:~/src/linux]% usekorg 14.1.0 aarch64-linux-objdump -t vmlinux | grep -w _text | ffff800080000000 g .head.text 0000000000000000 _text | [mark@lakrids:~/src/linux]% usekorg 14.1.0 aarch64-linux-objdump -t vmlinux | grep -w primary_entry | ffff8000889df0e0 g .rodata.text 000000000000006c primary_entry, ... consequently, LLD inserts a position-dependent veneer for the branch from _stext (in .head.text) to primary_entry (in .rodata.text): | ffff800080000000 <_text>: | ffff800080000000: fa405a4d ccmp x18, #0x0, #0xd, pl // pl = nfrst | ffff800080000004: 14003fff b ffff800080010000 <__AArch64AbsLongThunk_primary_entry> ... | ffff800080010000 <__AArch64AbsLongThunk_primary_entry>: | ffff800080010000: 58000050 ldr x16, ffff800080010008 <__AArch64AbsLongThunk_primary_entry+0x8> | ffff800080010004: d61f0200 br x16 | ffff800080010008: 889df0e0 .word 0x889df0e0 | ffff80008001000c: ffff8000 .word 0xffff8000 ... and as this is executed early in boot before the kernel is mapped in TTBR1 this results in a silent boot failure. Fix this by passing '--pic-veneer' to the linker, which will cause the linker to use position-independent veneers, e.g. | ffff800080000000 <_text>: | ffff800080000000: fa405a4d ccmp x18, #0x0, #0xd, pl // pl = nfrst | ffff800080000004: 14003fff b ffff800080010000 <__AArch64ADRPThunk_primary_entry> ... | ffff800080010000 <__AArch64ADRPThunk_primary_entry>: | ffff800080010000: f004e3f0 adrp x16, ffff800089c8f000 <__idmap_text_start> | ffff800080010004: 91038210 add x16, x16, #0xe0 | ffff800080010008: d61f0200 br x16 I've opted to pass '--pic-veneer' unconditionally, as: * In addition to solving the boot failure, these sequences are generally nicer as they require fewer instructions and don't need to perform data accesses. * While the position-independent veneer sequences have a limited +/-2GiB range, this is not a new restriction. Even kernels built with CONFIG_RELOCATABLE=n are limited to 2GiB in size as we have several structues using 32-bit relative offsets and PPREL32 relocations, which are similarly limited to +/-2GiB in range. These include extable entries, jump table entries, and alt_instr entries. * GNU LD defaults to using position-independent veneers, and supports the same '--pic-veneer' option, so this change is not expected to adversely affect GNU LD. I've tested with GNU LD 2.30 to 2.42 inclusive and LLVM 13.0.1 to 19.1.0 inclusive, using the kernel.org binaries from: * https://mirrors.edge.kernel.org/pub/tools/crosstool/ * https://mirrors.edge.kernel.org/pub/tools/llvm/ Signed-off-by: Mark Rutland Reported-by: Marc Zyngier Cc: Ard Biesheuvel Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Will Deacon Acked-by: Ard Biesheuvel Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20240927101838.3061054-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 485b7dbd4f9e..96dcddc358c7 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X --pic-veneer ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour From df848523d6b47579a548561ac7f0dad9006ac69c Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Tue, 22 Oct 2024 09:40:37 -0500 Subject: [PATCH 31/32] jfs: Fix sanity check in dbMount [ Upstream commit 67373ca8404fe57eb1bb4b57f314cff77ce54932 ] MAXAG is a legitimate value for bmp->db_numag Fixes: e63866a47556 ("jfs: fix out-of-bounds in dbNextAG() and diAlloc()") Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin --- fs/jfs/jfs_dmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 8efd93992946..559f6ebebfc0 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -187,7 +187,7 @@ int dbMount(struct inode *ipbmap) } bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); - if (!bmp->db_numag || bmp->db_numag >= MAXAG) { + if (!bmp->db_numag || bmp->db_numag > MAXAG) { err = -EINVAL; goto err_release_metapage; } From 02874ca52df2ca2423ba6122039315ed61c25972 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 7 Oct 2024 15:47:24 +0100 Subject: [PATCH 32/32] tracing: Consider the NULL character when validating the event length [ Upstream commit 0b6e2e22cb23105fcb171ab92f0f7516c69c8471 ] strlen() returns a string length excluding the null byte. If the string length equals to the maximum buffer length, the buffer will have no space for the NULL terminating character. This commit checks this condition and returns failure for it. Link: https://lore.kernel.org/all/20241007144724.920954-1-leo.yan@arm.com/ Fixes: dec65d79fd26 ("tracing/probe: Check event name length correctly") Signed-off-by: Leo Yan Reviewed-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 073abbe3866b..1893fe5460ac 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -256,7 +256,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, if (len == 0) { trace_probe_log_err(offset, NO_EVENT_NAME); return -EINVAL; - } else if (len > MAX_EVENT_NAME_LEN) { + } else if (len >= MAX_EVENT_NAME_LEN) { trace_probe_log_err(offset, EVENT_TOO_LONG); return -EINVAL; }