From 3fcd53b1d859799686a08785afb8990566c31cfa Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 8 Apr 2020 00:01:42 +0200 Subject: [PATCH 01/42] net: phy: realtek: fix handling of RTL8105e-integrated PHY [ No applicable upstream commit ] After the referenced fix it turned out that one particular RTL8168 chip version (RTL8105e) does not work on 5.4 because no dedicated PHY driver exists. Adding this PHY driver was done for fixing a different issue for versions from 5.5 already. I re-send the same change for 5.4 because the commit message differs. Fixes: 2e8c339b4946 ("r8169: fix PHY driver check on platforms w/o module softdeps") Signed-off-by: Heiner Kallweit Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index c76df51dd3c5..879ca37c8508 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -456,6 +456,15 @@ static struct phy_driver realtek_drvs[] = { .resume = genphy_resume, .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, + }, { + PHY_ID_MATCH_MODEL(0x001cc880), + .name = "RTL8208 Fast Ethernet", + .read_mmd = genphy_read_mmd_unsupported, + .write_mmd = genphy_write_mmd_unsupported, + .suspend = genphy_suspend, + .resume = genphy_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, { PHY_ID_MATCH_EXACT(0x001cc910), .name = "RTL8211 Gigabit Ethernet", From 77cf807936923f6b241b268219febede5cdae7bc Mon Sep 17 00:00:00 2001 From: Herat Ramani Date: Wed, 1 Apr 2020 01:16:09 +0530 Subject: [PATCH 02/42] cxgb4: fix MPS index overwrite when setting MAC address [ Upstream commit 41aa8561ca3fc5748391f08cc5f3e561923da52c ] cxgb4_update_mac_filt() earlier requests firmware to add a new MAC address into MPS TCAM. The MPS TCAM index returned by firmware is stored in pi->xact_addr_filt. However, the saved MPS TCAM index gets overwritten again with the return value of cxgb4_update_mac_filt(), which is wrong. When trying to update to another MAC address later, the wrong MPS TCAM index is sent to firmware, which causes firmware to return error, because it's not the same MPS TCAM index that firmware had sent earlier to driver. So, fix by removing the wrong overwrite being done after call to cxgb4_update_mac_filt(). Fixes: 3f8cfd0d95e6 ("cxgb4/cxgb4vf: Program hash region for {t4/t4vf}_change_mac()") Signed-off-by: Herat Ramani Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 38024877751c..069a51847885 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3032,7 +3032,6 @@ static int cxgb_set_mac_addr(struct net_device *dev, void *p) return ret; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - pi->xact_addr_filt = ret; return 0; } From bbbdd7956bab09a80d9453e76c9234572c2f6432 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 30 Mar 2020 11:22:19 -0400 Subject: [PATCH 03/42] ipv6: don't auto-add link-local address to lag ports [ Upstream commit 744fdc8233f6aa9582ce08a51ca06e59796a3196 ] Bonding slave and team port devices should not have link-local addresses automatically added to them, as it can interfere with openvswitch being able to properly add tc ingress. Basic reproducer, courtesy of Marcelo: $ ip link add name bond0 type bond $ ip link set dev ens2f0np0 master bond0 $ ip link set dev ens2f1np2 master bond0 $ ip link set dev bond0 up $ ip a s 1: lo: mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000 link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 inet 127.0.0.1/8 scope host lo valid_lft forever preferred_lft forever inet6 ::1/128 scope host valid_lft forever preferred_lft forever 2: ens2f0np0: mtu 1500 qdisc mq master bond0 state UP group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff 5: ens2f1np2: mtu 1500 qdisc mq master bond0 state DOWN group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff 11: bond0: mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff inet6 fe80::20f:53ff:fe2f:ea40/64 scope link valid_lft forever preferred_lft forever (above trimmed to relevant entries, obviously) $ sysctl net.ipv6.conf.ens2f0np0.addr_gen_mode=0 net.ipv6.conf.ens2f0np0.addr_gen_mode = 0 $ sysctl net.ipv6.conf.ens2f1np2.addr_gen_mode=0 net.ipv6.conf.ens2f1np2.addr_gen_mode = 0 $ ip a l ens2f0np0 2: ens2f0np0: mtu 1500 qdisc mq master bond0 state UP group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff inet6 fe80::20f:53ff:fe2f:ea40/64 scope link tentative valid_lft forever preferred_lft forever $ ip a l ens2f1np2 5: ens2f1np2: mtu 1500 qdisc mq master bond0 state DOWN group default qlen 1000 link/ether 00:0f:53:2f:ea:40 brd ff:ff:ff:ff:ff:ff inet6 fe80::20f:53ff:fe2f:ea40/64 scope link tentative valid_lft forever preferred_lft forever Looks like addrconf_sysctl_addr_gen_mode() bypasses the original "is this a slave interface?" check added by commit c2edacf80e15, and results in an address getting added, while w/the proposed patch added, no address gets added. This simply adds the same gating check to another code path, and thus should prevent the same devices from erroneously obtaining an ipv6 link-local address. Fixes: d35a00b8e33d ("net/ipv6: allow sysctl to change link-local address generation mode") Reported-by: Moshe Levi CC: Stephen Hemminger CC: Marcelo Ricardo Leitner CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d02ccd749a60..6e1d200f30c1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3296,6 +3296,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) if (netif_is_l3_master(idev->dev)) return; + /* no link local addresses on devices flagged as slaves */ + if (idev->dev->flags & IFF_SLAVE) + return; + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); switch (idev->cnf.addr_gen_mode) { From 040f7a27583f4d543f1c29141c3a2f16b3b1afe6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 4 Apr 2020 14:35:17 -0700 Subject: [PATCH 04/42] net: dsa: bcm_sf2: Do not register slave MDIO bus with OF [ Upstream commit 536fab5bf5826404534a6c271f622ad2930d9119 ] We were registering our slave MDIO bus with OF and doing so with assigning the newly created slave_mii_bus of_node to the master MDIO bus controller node. This is a bad thing to do for a number of reasons: - we are completely lying about the slave MII bus is arranged and yet we still want to control which MDIO devices it probes. It was attempted before to play tricks with the bus_mask to perform that: https://www.spinics.net/lists/netdev/msg429420.html but the approach was rightfully rejected - the device_node reference counting is messed up and we are effectively doing a double probe on the devices we already probed using the master, this messes up all resources reference counts (such as clocks) The proper fix for this as indicated by David in his reply to the thread above is to use a platform data style registration so as to control exactly which devices we probe: https://www.spinics.net/lists/netdev/msg430083.html By using mdiobus_register(), our slave_mii_bus->phy_mask value is used as intended, and all the PHY addresses that must be redirected towards our slave MDIO bus is happening while other addresses get redirected towards the master MDIO bus. Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 46dc913da852..4878400584c2 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -459,7 +459,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) priv->slave_mii_bus->parent = ds->dev->parent; priv->slave_mii_bus->phy_mask = ~priv->indir_phy_mask; - err = of_mdiobus_register(priv->slave_mii_bus, dn); + err = mdiobus_register(priv->slave_mii_bus); if (err && dn) of_node_put(dn); From bce7ce18bd18d022cba978bb4265b2eca94761d4 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Apr 2020 13:00:30 -0700 Subject: [PATCH 05/42] net: dsa: bcm_sf2: Ensure correct sub-node is parsed [ Upstream commit afa3b592953bfaecfb4f2f335ec5f935cff56804 ] When the bcm_sf2 was converted into a proper platform device driver and used the new dsa_register_switch() interface, we would still be parsing the legacy DSA node that contained all the port information since the platform firmware has intentionally maintained backward and forward compatibility to client programs. Ensure that we do parse the correct node, which is "ports" per the revised DSA binding. Fixes: d9338023fb8e ("net: dsa: bcm_sf2: Make it a real platform device driver") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/bcm_sf2.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 4878400584c2..9502db66092e 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1053,6 +1053,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) const struct bcm_sf2_of_data *data; struct b53_platform_data *pdata; struct dsa_switch_ops *ops; + struct device_node *ports; struct bcm_sf2_priv *priv; struct b53_device *dev; struct dsa_switch *ds; @@ -1115,7 +1116,11 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) set_bit(0, priv->cfp.used); set_bit(0, priv->cfp.unique); - bcm_sf2_identify_ports(priv, dn->child); + ports = of_find_node_by_name(dn, "ports"); + if (ports) { + bcm_sf2_identify_ports(priv, ports); + of_node_put(ports); + } priv->irq0 = irq_of_parse_and_map(dn, 0); priv->irq1 = irq_of_parse_and_map(dn, 1); From 7d3d99f579e81d6ee0b133e5a4ba1f3522553f65 Mon Sep 17 00:00:00 2001 From: Chuanhong Guo Date: Fri, 3 Apr 2020 19:28:24 +0800 Subject: [PATCH 06/42] net: dsa: mt7530: fix null pointer dereferencing in port5 setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0452800f6db4ed0a42ffb15867c0acfd68829f6a ] The 2nd gmac of mediatek soc ethernet may not be connected to a PHY and a phy-handle isn't always available. Unfortunately, mt7530 dsa driver assumes that the 2nd gmac is always connected to switch port 5 and setup mt7530 according to phy address of 2nd gmac node, causing null pointer dereferencing when phy-handle isn't defined in dts. This commit fix this setup code by checking return value of of_parse_phandle before using it. Fixes: 38f790a80560 ("net: dsa: mt7530: Add support for port 5") Signed-off-by: Chuanhong Guo Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Tested-by: René van Dorst Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index e0e932f0aed1..8071c3fa3fb7 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1353,6 +1353,9 @@ mt7530_setup(struct dsa_switch *ds) continue; phy_node = of_parse_phandle(mac_np, "phy-handle", 0); + if (!phy_node) + continue; + if (phy_node->parent == priv->dev->of_node->parent) { interface = of_get_phy_mode(mac_np); id = of_mdio_parse_addr(ds->dev, phy_node); From 1189ba9eedacb6ddc1171bd2602b338969d2b66c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 3 Apr 2020 09:53:25 +0200 Subject: [PATCH 07/42] net: phy: micrel: kszphy_resume(): add delay after genphy_resume() before accessing PHY registers [ Upstream commit 6110dff776f7fa65c35850ef65b41d3b39e2fac2 ] After the power-down bit is cleared, the chip internally triggers a global reset. According to the KSZ9031 documentation, we have to wait at least 1ms for the reset to finish. If the chip is accessed during reset, read will return 0xffff, while write will be ignored. Depending on the system performance and MDIO bus speed, we may or may not run in to this issue. This bug was discovered on an iMX6QP system with KSZ9031 PHY and attached PHY interrupt line. If IRQ was used, the link status update was lost. In polling mode, the link status update was always correct. The investigation showed, that during a read-modify-write access, the read returned 0xffff (while the chip was still in reset) and corresponding write hit the chip _after_ reset and triggered (due to the 0xffff) another reset in an undocumented bit (register 0x1f, bit 1), resulting in the next write being lost due to the new reset cycle. This patch fixes the issue by adding a 1...2 ms sleep after the genphy_resume(). Fixes: 836384d2501d ("net: phy: micrel: Add specific suspend") Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/micrel.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 63dedec0433d..51b64f087717 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -25,6 +25,7 @@ #include #include #include +#include /* Operation Mode Strap Override */ #define MII_KSZPHY_OMSO 0x16 @@ -902,6 +903,12 @@ static int kszphy_resume(struct phy_device *phydev) genphy_resume(phydev); + /* After switching from power-down to normal mode, an internal global + * reset is automatically generated. Wait a minimum of 1 ms before + * read/write access to the PHY registers. + */ + usleep_range(1000, 2000); + ret = kszphy_config_reset(phydev); if (ret) return ret; From 1891d57f89aac8deca2ecabaeabc283c0a026534 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 28 Mar 2020 12:12:59 -0700 Subject: [PATCH 08/42] net_sched: add a temporary refcnt for struct tcindex_data [ Upstream commit 304e024216a802a7dc8ba75d36de82fa136bbf3e ] Although we intentionally use an ordered workqueue for all tc filter works, the ordering is not guaranteed by RCU work, given that tcf_queue_work() is esstenially a call_rcu(). This problem is demostrated by Thomas: CPU 0: tcf_queue_work() tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work); -> Migration to CPU 1 CPU 1: tcf_queue_work(&p->rwork, tcindex_destroy_work); so the 2nd work could be queued before the 1st one, which leads to a free-after-free. Enforcing this order in RCU work is hard as it requires to change RCU code too. Fortunately we can workaround this problem in tcindex filter by taking a temporary refcnt, we only refcnt it right before we begin to destroy it. This simplifies the code a lot as a full refcnt requires much more changes in tcindex_set_parms(). Reported-by: syzbot+46f513c3033d592409d2@syzkaller.appspotmail.com Fixes: 3d210534cc93 ("net_sched: fix a race condition in tcindex_destroy()") Cc: Thomas Gleixner Cc: Paul E. McKenney Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Reviewed-by: Paul E. McKenney Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 44 +++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 9904299424a1..065345832a69 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -26,9 +27,12 @@ #define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ +struct tcindex_data; + struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; + struct tcindex_data *p; struct rcu_work rwork; }; @@ -49,6 +53,7 @@ struct tcindex_data { u32 hash; /* hash table size; 0 if undefined */ u32 alloc_hash; /* allocated size */ u32 fall_through; /* 0: only classify if explicit match */ + refcount_t refcnt; /* a temporary refcnt for perfect hash */ struct rcu_work rwork; }; @@ -57,6 +62,20 @@ static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) return tcf_exts_has_actions(&r->exts) || r->res.classid; } +static void tcindex_data_get(struct tcindex_data *p) +{ + refcount_inc(&p->refcnt); +} + +static void tcindex_data_put(struct tcindex_data *p) +{ + if (refcount_dec_and_test(&p->refcnt)) { + kfree(p->perfect); + kfree(p->h); + kfree(p); + } +} + static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, u16 key) { @@ -141,6 +160,7 @@ static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) { tcf_exts_destroy(&r->exts); tcf_exts_put_net(&r->exts); + tcindex_data_put(r->p); } static void tcindex_destroy_rexts_work(struct work_struct *work) @@ -212,6 +232,8 @@ static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, else __tcindex_destroy_fexts(f); } else { + tcindex_data_get(p); + if (tcf_exts_get_net(&r->exts)) tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work); else @@ -228,9 +250,7 @@ static void tcindex_destroy_work(struct work_struct *work) struct tcindex_data, rwork); - kfree(p->perfect); - kfree(p->h); - kfree(p); + tcindex_data_put(p); } static inline int @@ -248,9 +268,11 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { }; static int tcindex_filter_result_init(struct tcindex_filter_result *r, + struct tcindex_data *p, struct net *net) { memset(r, 0, sizeof(*r)); + r->p = p; return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } @@ -290,6 +312,7 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) goto errout; + cp->perfect[i].p = cp; } return 0; @@ -334,6 +357,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->alloc_hash = p->alloc_hash; cp->fall_through = p->fall_through; cp->tp = tp; + refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */ if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -366,7 +390,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } cp->h = p->h; - err = tcindex_filter_result_init(&new_filter_result, net); + err = tcindex_filter_result_init(&new_filter_result, cp, net); if (err < 0) goto errout_alloc; if (old_r) @@ -434,7 +458,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, goto errout_alloc; f->key = handle; f->next = NULL; - err = tcindex_filter_result_init(&f->result, net); + err = tcindex_filter_result_init(&f->result, cp, net); if (err < 0) { kfree(f); goto errout_alloc; @@ -447,7 +471,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (old_r && old_r != r) { - err = tcindex_filter_result_init(old_r, net); + err = tcindex_filter_result_init(old_r, cp, net); if (err < 0) { kfree(f); goto errout_alloc; @@ -571,6 +595,14 @@ static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, for (i = 0; i < p->hash; i++) { struct tcindex_filter_result *r = p->perfect + i; + /* tcf_queue_work() does not guarantee the ordering we + * want, so we have to take this refcnt temporarily to + * ensure 'p' is freed after all tcindex_filter_result + * here. Imperfect hash does not need this, because it + * uses linked lists rather than an array. + */ + tcindex_data_get(p); + tcf_unbind_filter(tp, &r->res); if (tcf_exts_get_net(&r->exts)) tcf_queue_work(&r->rwork, From 049b9fa3ef65095250243e3e196b2f1bb16cef89 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 2 Apr 2020 20:58:51 -0700 Subject: [PATCH 09/42] net_sched: fix a missing refcnt in tcindex_init() [ Upstream commit a8eab6d35e22f4f21471f16147be79529cd6aaf7 ] The initial refcnt of struct tcindex_data should be 1, it is clear that I forgot to set it to 1 in tcindex_init(). This leads to a dec-after-zero warning. Reported-by: syzbot+8325e509a1bf83ec741d@syzkaller.appspotmail.com Fixes: 304e024216a8 ("net_sched: add a temporary refcnt for struct tcindex_data") Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: Paul E. McKenney Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/cls_tcindex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 065345832a69..61e95029c18f 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -151,6 +151,7 @@ static int tcindex_init(struct tcf_proto *tp) p->mask = 0xffff; p->hash = DEFAULT_HASH_SIZE; p->fall_through = 1; + refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */ rcu_assign_pointer(tp->root, p); return 0; From feed32e3d6feb7a9e521a8e445b3beb953f776cf Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 3 Apr 2020 10:23:29 +0800 Subject: [PATCH 10/42] net: stmmac: dwmac1000: fix out-of-bounds mac address reg setting [ Upstream commit 3e1221acf6a8f8595b5ce354bab4327a69d54d18 ] Commit 9463c4455900 ("net: stmmac: dwmac1000: Clear unused address entries") cleared the unused mac address entries, but introduced an out-of bounds mac address register programming bug -- After setting the secondary unicast mac addresses, the "reg" value has reached netdev_uc_count() + 1, thus we should only clear address entries if (addr < perfect_addr_number) Fixes: 9463c4455900 ("net: stmmac: dwmac1000: Clear unused address entries") Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 43a785f86c69..bc9b01376e80 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -209,7 +209,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, reg++; } - while (reg <= perfect_addr_number) { + while (reg < perfect_addr_number) { writel(0, ioaddr + GMAC_ADDR_HIGH(reg)); writel(0, ioaddr + GMAC_ADDR_LOW(reg)); reg++; From fdb6a094ba41e985d9fb14ae2bfc180e3e983720 Mon Sep 17 00:00:00 2001 From: Richard Palethorpe Date: Wed, 1 Apr 2020 12:06:39 +0200 Subject: [PATCH 11/42] slcan: Don't transmit uninitialized stack data in padding [ Upstream commit b9258a2cece4ec1f020715fe3554bc2e360f6264 ] struct can_frame contains some padding which is not explicitly zeroed in slc_bump. This uninitialized data will then be transmitted if the stack initialization hardening feature is not enabled (CONFIG_INIT_STACK_ALL). This commit just zeroes the whole struct including the padding. Signed-off-by: Richard Palethorpe Fixes: a1044e36e457 ("can: add slcan driver for serial/USB-serial CAN adapters") Reviewed-by: Kees Cook Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Cc: security@kernel.org Cc: wg@grandegger.com Cc: mkl@pengutronix.de Cc: davem@davemloft.net Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/slcan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index a3664281a33f..4dfa459ef5c7 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -148,7 +148,7 @@ static void slc_bump(struct slcan *sl) u32 tmpid; char *cmd = sl->rbuff; - cf.can_id = 0; + memset(&cf, 0, sizeof(cf)); switch (*cmd) { case 'r': @@ -187,8 +187,6 @@ static void slc_bump(struct slcan *sl) else return; - *(u64 *) (&cf.data) = 0; /* clear payload */ - /* RTR frames may have a dlc > 0 but they never have any data bytes */ if (!(cf.can_id & CAN_RTR_FLAG)) { for (i = 0; i < cf.can_dlc; i++) { From 671331c11c39233e731bf5230a1c75b90ba55acf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 3 Apr 2020 16:13:21 +0100 Subject: [PATCH 12/42] tun: Don't put_page() for all negative return values from XDP program [ Upstream commit bee348907d19d654e8524d3a946dcd25b693aa7e ] When an XDP program is installed, tun_build_skb() grabs a reference to the current page fragment page if the program returns XDP_REDIRECT or XDP_TX. However, since tun_xdp_act() passes through negative return values from the XDP program, it is possible to trigger the error path by mistake and accidentally drop a reference to the fragments page without taking one, leading to a spurious free. This is believed to be the cause of some KASAN use-after-free reports from syzbot [1], although without a reproducer it is not possible to confirm whether this patch fixes the problem. Ensure that we only drop a reference to the fragments page if the XDP transmit or redirect operations actually fail. [1] https://syzkaller.appspot.com/bug?id=e76a6af1be4acd727ff6bbca669833f98cbf5d95 Cc: "David S. Miller" Cc: Alexei Starovoitov Cc: Daniel Borkmann CC: Eric Dumazet Acked-by: Jason Wang Fixes: 8ae1aff0b331 ("tuntap: split out XDP logic") Signed-off-by: Will Deacon Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 69f553a028ee..16f5cb249ed5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1715,8 +1715,12 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, alloc_frag->offset += buflen; } err = tun_xdp_act(tun, xdp_prog, &xdp, act); - if (err < 0) - goto err_xdp; + if (err < 0) { + if (act == XDP_REDIRECT || act == XDP_TX) + put_page(alloc_frag->page); + goto out; + } + if (err == XDP_REDIRECT) xdp_do_flush_map(); if (err != XDP_PASS) @@ -1730,8 +1734,6 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad); -err_xdp: - put_page(alloc_frag->page); out: rcu_read_unlock(); local_bh_enable(); From 5249653d971d3f8ce4798dcadf1f38bd8572f0f9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 5 Apr 2020 09:50:22 +0300 Subject: [PATCH 13/42] mlxsw: spectrum_flower: Do not stop at FLOW_ACTION_VLAN_MANGLE [ Upstream commit ccfc569347f870830e7c7cf854679a06cf9c45b5 ] The handler for FLOW_ACTION_VLAN_MANGLE ends by returning whatever the lower-level function that it calls returns. If there are more actions lined up after this action, those are never offloaded. Fix by only bailing out when the called function returns an error. Fixes: a150201a70da ("mlxsw: spectrum: Add support for vlan modify TC action") Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index b607919c8ad0..498de6ef6870 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -123,9 +123,12 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, u8 prio = act->vlan.prio; u16 vid = act->vlan.vid; - return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, - act->id, vid, - proto, prio, extack); + err = mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, + act->id, vid, + proto, prio, extack); + if (err) + return err; + break; } default: NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); From 74107d56d1e8e6ac5a061059941b7e2d03522df6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 4 Apr 2020 23:48:45 +0200 Subject: [PATCH 14/42] r8169: change back SG and TSO to be disabled by default [ Upstream commit 95099c569a9fdbe186a27447dfa8a5a0562d4b7f ] There has been a number of reports that using SG/TSO on different chip versions results in tx timeouts. However for a lot of people SG/TSO works fine. Therefore disable both features by default, but allow users to enable them. Use at own risk! Fixes: 93681cd7d94f ("r8169: enable HW csum and TSO") Signed-off-by: Heiner Kallweit Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 34 +++++++++++------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5ebfc3e66331..3bc6d1ef29ec 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -7167,12 +7167,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_napi_add(dev, &tp->napi, rtl8169_poll, NAPI_POLL_WEIGHT); - dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX; - dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | - NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX; + dev->features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO | NETIF_F_HIGHDMA; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -7190,25 +7188,25 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; if (rtl_chip_supports_csum_v2(tp)) { - dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6; - dev->features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6; + dev->hw_features |= NETIF_F_IPV6_CSUM; + dev->features |= NETIF_F_IPV6_CSUM; + } + + /* There has been a number of reports that using SG/TSO results in + * tx timeouts. However for a lot of people SG/TSO works fine. + * Therefore disable both features by default, but allow users to + * enable them. Use at own risk! + */ + if (rtl_chip_supports_csum_v2(tp)) { + dev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6; dev->gso_max_size = RTL_GSO_MAX_SIZE_V2; dev->gso_max_segs = RTL_GSO_MAX_SEGS_V2; } else { + dev->hw_features |= NETIF_F_SG | NETIF_F_TSO; dev->gso_max_size = RTL_GSO_MAX_SIZE_V1; dev->gso_max_segs = RTL_GSO_MAX_SEGS_V1; } - /* RTL8168e-vl and one RTL8168c variant are known to have a - * HW issue with TSO. - */ - if (tp->mac_version == RTL_GIGA_MAC_VER_34 || - tp->mac_version == RTL_GIGA_MAC_VER_22) { - dev->vlan_features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG); - dev->hw_features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG); - dev->features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG); - } - dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; From 5e331978200e379c525394aa0863cece328c3dee Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 22 Jan 2020 13:38:22 +0100 Subject: [PATCH 15/42] s390: prevent leaking kernel address in BEAR commit 0b38b5e1d0e2f361e418e05c179db05bb688bbd6 upstream. When userspace executes a syscall or gets interrupted, BEAR contains a kernel address when returning to userspace. This make it pretty easy to figure out where the kernel is mapped even with KASLR enabled. To fix this, add lpswe to lowcore and always execute it there, so userspace sees only the lowcore address of lpswe. For this we have to extend both critical_cleanup and the SWITCH_ASYNC macro to also check for lpswe addresses in lowcore. Fixes: b2d24b97b2a9 ("s390/kernel: add support for kernel address space layout randomization (KASLR)") Cc: # v5.2+ Reviewed-by: Gerald Schaefer Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/processor.h | 1 + arch/s390/include/asm/setup.h | 7 ++++ arch/s390/kernel/asm-offsets.c | 2 + arch/s390/kernel/entry.S | 65 ++++++++++++++++++------------- arch/s390/kernel/process.c | 1 + arch/s390/kernel/setup.c | 3 ++ arch/s390/kernel/smp.c | 2 + arch/s390/mm/vmem.c | 4 ++ 9 files changed, 62 insertions(+), 27 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 237ee0c4169f..612ed3c6d581 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -141,7 +141,9 @@ struct lowcore { /* br %r1 trampoline */ __u16 br_r1_trampoline; /* 0x0400 */ - __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */ + __u32 return_lpswe; /* 0x0402 */ + __u32 return_mcck_lpswe; /* 0x0406 */ + __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 51a0e4a2dc96..560d8b77b1d1 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -162,6 +162,7 @@ typedef struct thread_struct thread_struct; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ .fpu.regs = (void *) init_task.thread.fpu.fprs, \ + .last_break = 1, \ } /* diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 6dc6c4fbc8e2..1932088686a6 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -8,6 +8,7 @@ #include #include +#include #define EP_OFFSET 0x10008 #define EP_STRING "S390EP" @@ -157,6 +158,12 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } +static inline u32 gen_lpswe(unsigned long addr) +{ + BUILD_BUG_ON(addr > 0xfff); + return 0xb2b20000 | addr; +} + #else /* __ASSEMBLY__ */ #define IPL_DEVICE (IPL_DEVICE_OFFSET) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 41ac4ad21311..b6628586ab70 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -125,6 +125,8 @@ int main(void) OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code); OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr); + OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe); + OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe); OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw); OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 270d1d145761..bc85987727f0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -115,26 +115,29 @@ _LPP_OFFSET = __LC_LPP .macro SWITCH_ASYNC savearea,timer tmhh %r8,0x0001 # interrupting from user ? - jnz 1f + jnz 2f lgr %r14,%r9 + cghi %r14,__LC_RETURN_LPSWE + je 0f slg %r14,BASED(.Lcritical_start) clg %r14,BASED(.Lcritical_length) - jhe 0f + jhe 1f +0: lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,cleanup_critical tmhh %r8,0x0001 # retest problem state after cleanup - jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? + jnz 2f +1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jnz 2f + jnz 3f CHECK_STACK \savearea aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f -1: UPDATE_VTIME %r14,%r15,\timer + j 4f +2: UPDATE_VTIME %r14,%r15,\timer BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -2: lg %r15,__LC_ASYNC_STACK # load async stack -3: la %r11,STACK_FRAME_OVERHEAD(%r15) +3: lg %r15,__LC_ASYNC_STACK # load async stack +4: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME w1,w2,enter_timer @@ -401,7 +404,7 @@ ENTRY(system_call) stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW + b __LC_RETURN_LPSWE(%r0) .Lsysc_done: # @@ -608,43 +611,50 @@ ENTRY(pgm_check_handler) BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK - lg %r12,__LC_CURRENT + srag %r11,%r10,12 + jnz 0f + /* if __LC_LAST_BREAK is < 4096, it contains one of + * the lpswe addresses in lowcore. Set it to 1 (initial state) + * to prevent leaking that address to userspace. + */ + lghi %r10,1 +0: lg %r12,__LC_CURRENT lghi %r11,0 larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # test problem state bit - jnz 2f # -> fault in user space + jnz 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 slg %r14,BASED(.Lsie_critical_start) clg %r14,BASED(.Lsie_critical_length) - jhe 0f + jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit lghi %r11,_PIF_GUEST_FAULT #endif -0: tmhh %r8,0x4000 # PER bit set in old PSW ? - jnz 1f # -> enabled, can't be a double fault +1: tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3,0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -1: CHECK_STACK __LC_SAVE_AREA_SYNC +2: CHECK_STACK __LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f -2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + # CHECK_VMAP_STACK branches to stack_overflow or 5f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f +3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 aghi %r14,__TASK_thread # pointer to thread_struct lghi %r13,__LC_PGM_TDB tm __LC_PGM_ILC+2,0x02 # check for transaction abort - jz 3f + jz 4f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: stg %r10,__THREAD_last_break(%r14) -4: lgr %r13,%r11 +4: stg %r10,__THREAD_last_break(%r14) +5: lgr %r13,%r11 la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use @@ -663,14 +673,14 @@ ENTRY(pgm_check_handler) stg %r13,__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 5f + jz 6f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -5: REENABLE_IRQS +6: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) @@ -775,7 +785,7 @@ ENTRY(io_int_handler) mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER .Lio_exit_kernel: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW + b __LC_RETURN_LPSWE(%r0) .Lio_done: # @@ -1214,7 +1224,7 @@ ENTRY(mcck_int_handler) stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_MCCK_PSW + b __LC_RETURN_MCCK_LPSWE .Lmcck_panic: lg %r15,__LC_NODAT_STACK @@ -1271,6 +1281,8 @@ ENDPROC(stack_overflow) #endif ENTRY(cleanup_critical) + cghi %r9,__LC_RETURN_LPSWE + je .Lcleanup_lpswe #if IS_ENABLED(CONFIG_KVM) clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap jl 0f @@ -1424,6 +1436,7 @@ ENDPROC(cleanup_critical) mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) +.Lcleanup_lpswe: 1: lmg %r8,%r9,__LC_RETURN_PSW BR_EX %r14,%r11 .Lcleanup_sysc_restore_insn: diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b0afec673f77..4e6299e2ca94 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -105,6 +105,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, p->thread.system_timer = 0; p->thread.hardirq_timer = 0; p->thread.softirq_timer = 0; + p->thread.last_break = 1; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b95e6fa34cc8..4366962f4930 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -73,6 +73,7 @@ #include #include #include +#include #include "entry.h" /* @@ -457,6 +458,8 @@ static void __init setup_lowcore_dat_off(void) lc->spinlock_index = 0; arch_spin_lock_setup(0); lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 06dddd7c4290..f468a10e5206 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -212,6 +212,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) goto out_async; if (vdso_alloc_per_cpu(lc)) diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b403fa14847d..f810930aff42 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -415,6 +415,10 @@ void __init vmem_map_init(void) SET_MEMORY_RO | SET_MEMORY_X); __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); + + /* we need lowcore executable for our LPSWE instructions */ + set_memory_x(0, 1); + pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } From 170f88a47b9ffa1afcbca384719faef8490abd31 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 21 Feb 2020 21:10:37 +0100 Subject: [PATCH 16/42] random: always use batched entropy for get_random_u{32,64} commit 69efea712f5b0489e67d07565aad5c94e09a3e52 upstream. It turns out that RDRAND is pretty slow. Comparing these two constructions: for (i = 0; i < CHACHA_BLOCK_SIZE; i += sizeof(ret)) arch_get_random_long(&ret); and long buf[CHACHA_BLOCK_SIZE / sizeof(long)]; extract_crng((u8 *)buf); it amortizes out to 352 cycles per long for the top one and 107 cycles per long for the bottom one, on Coffee Lake Refresh, Intel Core i9-9880H. And importantly, the top one has the drawback of not benefiting from the real rng, whereas the bottom one has all the nice benefits of using our own chacha rng. As get_random_u{32,64} gets used in more places (perhaps beyond what it was originally intended for when it was introduced as get_random_{int,long} back in the md5 monstrosity era), it seems like it might be a good thing to strengthen its posture a tiny bit. Doing this should only be stronger and not any weaker because that pool is already initialized with a bunch of rdrand data (when available). This way, we get the benefits of the hardware rng as well as our own rng. Another benefit of this is that we no longer hit pitfalls of the recent stream of AMD bugs in RDRAND. One often used code pattern for various things is: do { val = get_random_u32(); } while (hash_table_contains_key(val)); That recent AMD bug rendered that pattern useless, whereas we're really very certain that chacha20 output will give pretty distributed numbers, no matter what. So, this simplification seems better both from a security perspective and from a performance perspective. Signed-off-by: Jason A. Donenfeld Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20200221201037.30231-1-Jason@zx2c4.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- drivers/char/random.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index a385fc1da1cb..8ff28c14af7e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2358,11 +2358,11 @@ struct batched_entropy { /* * Get a random word for internal kernel use only. The quality of the random - * number is either as good as RDRAND or as good as /dev/urandom, with the - * goal of being quite fast and not depleting entropy. In order to ensure + * number is good as /dev/urandom, but there is no backtrack protection, with + * the goal of being quite fast and not depleting entropy. In order to ensure * that the randomness provided by this function is okay, the function - * wait_for_random_bytes() should be called and return 0 at least once - * at any point prior. + * wait_for_random_bytes() should be called and return 0 at least once at any + * point prior. */ static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), @@ -2375,15 +2375,6 @@ u64 get_random_u64(void) struct batched_entropy *batch; static void *previous; -#if BITS_PER_LONG == 64 - if (arch_get_random_long((unsigned long *)&ret)) - return ret; -#else - if (arch_get_random_long((unsigned long *)&ret) && - arch_get_random_long((unsigned long *)&ret + 1)) - return ret; -#endif - warn_unseeded_randomness(&previous); batch = raw_cpu_ptr(&batched_entropy_u64); @@ -2408,9 +2399,6 @@ u32 get_random_u32(void) struct batched_entropy *batch; static void *previous; - if (arch_get_random_int(&ret)) - return ret; - warn_unseeded_randomness(&previous); batch = raw_cpu_ptr(&batched_entropy_u32); From 94d2d84bcafa3f491c55583154fd543a9b62f836 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 5 Mar 2020 13:24:01 -0800 Subject: [PATCH 17/42] usb: dwc3: gadget: Wrap around when skip TRBs commit 2dedea035ae82c5af0595637a6eda4655532b21e upstream. When skipping TRBs, we need to account for wrapping around the ring buffer and not modifying some invalid TRBs. Without this fix, dwc3 won't be able to check for available TRBs. Cc: stable Fixes: 7746a8dfb3f9 ("usb: dwc3: gadget: extract dwc3_gadget_ep_skip_trbs()") Signed-off-by: Thinh Nguyen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6ac02ba5e4a1..09d6b11246c9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1518,7 +1518,7 @@ static void dwc3_gadget_ep_skip_trbs(struct dwc3_ep *dep, struct dwc3_request *r for (i = 0; i < req->num_trbs; i++) { struct dwc3_trb *trb; - trb = req->trb + i; + trb = &dep->trb_pool[dep->trb_dequeue]; trb->ctrl &= ~DWC3_TRB_CTRL_HWO; dwc3_ep_inc_deq(dep); } From 8b0f08036659cf67b4d22cd0a5ad96db5dec2057 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 30 Jan 2020 22:16:40 -0800 Subject: [PATCH 18/42] uapi: rename ext2_swab() to swab() and share globally in swab.h commit d5767057c9a76a29f073dad66b7fa12a90e8c748 upstream. ext2_swab() is defined locally in lib/find_bit.c However it is not specific to ext2, neither to bitmaps. There are many potential users of it, so rename it to just swab() and move to include/uapi/linux/swab.h ABI guarantees that size of unsigned long corresponds to BITS_PER_LONG, therefore drop unneeded cast. Link: http://lkml.kernel.org/r/20200103202846.21616-1-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Allison Randal Cc: Joe Perches Cc: Thomas Gleixner Cc: William Breathitt Gray Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/swab.h | 1 + include/uapi/linux/swab.h | 10 ++++++++++ lib/find_bit.c | 16 ++-------------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/include/linux/swab.h b/include/linux/swab.h index e466fd159c85..bcff5149861a 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -7,6 +7,7 @@ # define swab16 __swab16 # define swab32 __swab32 # define swab64 __swab64 +# define swab __swab # define swahw32 __swahw32 # define swahb32 __swahb32 # define swab16p __swab16p diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 23cd84868cc3..fa7f97da5b76 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -4,6 +4,7 @@ #include #include +#include #include /* @@ -132,6 +133,15 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) __fswab64(x)) #endif +static __always_inline unsigned long __swab(const unsigned long y) +{ +#if BITS_PER_LONG == 64 + return __swab64(y); +#else /* BITS_PER_LONG == 32 */ + return __swab32(y); +#endif +} + /** * __swahw32 - return a word-swapped 32-bit value * @x: value to wordswap diff --git a/lib/find_bit.c b/lib/find_bit.c index 5c51eb45178a..4e68490fa703 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -149,18 +149,6 @@ EXPORT_SYMBOL(find_last_bit); #ifdef __BIG_ENDIAN -/* include/linux/byteorder does not support "unsigned long" type */ -static inline unsigned long ext2_swab(const unsigned long y) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64((u64) y); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32((u32) y); -#else -#error BITS_PER_LONG not defined -#endif -} - #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) static inline unsigned long _find_next_bit_le(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, @@ -177,7 +165,7 @@ static inline unsigned long _find_next_bit_le(const unsigned long *addr1, tmp ^= invert; /* Handle 1st word. */ - tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start)); + tmp &= swab(BITMAP_FIRST_WORD_MASK(start)); start = round_down(start, BITS_PER_LONG); while (!tmp) { @@ -191,7 +179,7 @@ static inline unsigned long _find_next_bit_le(const unsigned long *addr1, tmp ^= invert; } - return min(start + __ffs(ext2_swab(tmp)), nbits); + return min(start + __ffs(swab(tmp)), nbits); } #endif From ea84a26ab63349e48537bace81573520a2152634 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 1 Apr 2020 21:04:23 -0700 Subject: [PATCH 19/42] slub: improve bit diffusion for freelist ptr obfuscation commit 1ad53d9fa3f6168ebcf48a50e08b170432da2257 upstream. Under CONFIG_SLAB_FREELIST_HARDENED=y, the obfuscation was relatively weak in that the ptr and ptr address were usually so close that the first XOR would result in an almost entirely 0-byte value[1], leaving most of the "secret" number ultimately being stored after the third XOR. A single blind memory content exposure of the freelist was generally sufficient to learn the secret. Add a swab() call to mix bits a little more. This is a cheap way (1 cycle) to make attacks need more than a single exposure to learn the secret (or to know _where_ the exposure is in memory). kmalloc-32 freelist walk, before: ptr ptr_addr stored value secret ffff90c22e019020@ffff90c22e019000 is 86528eb656b3b5bd (86528eb656b3b59d) ffff90c22e019040@ffff90c22e019020 is 86528eb656b3b5fd (86528eb656b3b59d) ffff90c22e019060@ffff90c22e019040 is 86528eb656b3b5bd (86528eb656b3b59d) ffff90c22e019080@ffff90c22e019060 is 86528eb656b3b57d (86528eb656b3b59d) ffff90c22e0190a0@ffff90c22e019080 is 86528eb656b3b5bd (86528eb656b3b59d) ... after: ptr ptr_addr stored value secret ffff9eed6e019020@ffff9eed6e019000 is 793d1135d52cda42 (86528eb656b3b59d) ffff9eed6e019040@ffff9eed6e019020 is 593d1135d52cda22 (86528eb656b3b59d) ffff9eed6e019060@ffff9eed6e019040 is 393d1135d52cda02 (86528eb656b3b59d) ffff9eed6e019080@ffff9eed6e019060 is 193d1135d52cdae2 (86528eb656b3b59d) ffff9eed6e0190a0@ffff9eed6e019080 is f93d1135d52cdac2 (86528eb656b3b59d) [1] https://blog.infosectcbr.com.au/2020/03/weaknesses-in-linux-kernel-heap.html Fixes: 2482ddec670f ("mm: add SLUB free list pointer obfuscation") Reported-by: Silvio Cesare Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Link: http://lkml.kernel.org/r/202003051623.AF4F8CB@keescook Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 59ed00be02cb..af44807d5b05 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -261,7 +261,7 @@ static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr, * freepointer to be restored incorrectly. */ return (void *)((unsigned long)ptr ^ s->random ^ - (unsigned long)kasan_reset_tag((void *)ptr_addr)); + swab((unsigned long)kasan_reset_tag((void *)ptr_addr))); #else return ptr; #endif From dfa210cf9f949a2c8a0686a3095093419eea2e84 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 1 Apr 2020 21:02:25 -0700 Subject: [PATCH 20/42] tools/accounting/getdelays.c: fix netlink attribute length commit 4054ab64e29bb05b3dfe758fff3c38a74ba753bb upstream. A recent change to the netlink code: 6e237d099fac ("netlink: Relax attr validation for fixed length types") logs a warning when programs send messages with invalid attributes (e.g., wrong length for a u32). Yafang reported this error message for tools/accounting/getdelays.c. send_cmd() is wrongly adding 1 to the attribute length. As noted in include/uapi/linux/netlink.h nla_len should be NLA_HDRLEN + payload length, so drop the +1. Fixes: 9e06d3f9f6b1 ("per task delay accounting taskstats interface: documentation fix") Reported-by: Yafang Shao Signed-off-by: David Ahern Signed-off-by: Andrew Morton Tested-by: Yafang Shao Cc: Johannes Berg Cc: Shailabh Nagar Cc: Link: http://lkml.kernel.org/r/20200327173111.63922-1-dsahern@kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- tools/accounting/getdelays.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 8cb504d30384..5ef1c15e88ad 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -136,7 +136,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, msg.g.version = 0x1; na = (struct nlattr *) GENLMSG_DATA(&msg); na->nla_type = nla_type; - na->nla_len = nla_len + 1 + NLA_HDRLEN; + na->nla_len = nla_len + NLA_HDRLEN; memcpy(NLA_DATA(na), nla_data, nla_len); msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); From 41a0cfa05c053002f6a8e2d41c5dc3e21da754b6 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 5 Mar 2020 21:58:20 +0100 Subject: [PATCH 21/42] hwrng: imx-rngc - fix an error path commit 47a1f8e8b3637ff5f7806587883d7d94068d9ee8 upstream. Make sure that the rngc interrupt is masked if the rngc self test fails. Self test failure means that probe fails as well. Interrupts should be masked in this case, regardless of the error. Cc: stable@vger.kernel.org Fixes: 1d5449445bd0 ("hwrng: mx-rngc - add a driver for Freescale RNGC") Reviewed-by: PrasannaKumar Muralidharan Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/imx-rngc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index 30cf00f8e9a0..0576801944fd 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -105,8 +105,10 @@ static int imx_rngc_self_test(struct imx_rngc *rngc) return -ETIMEDOUT; } - if (rngc->err_reg != 0) + if (rngc->err_reg != 0) { + imx_rngc_irq_mask_clear(rngc); return -EIO; + } return 0; } From 9da847d65f3710270a955bc40619b247421cd729 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 Apr 2020 17:48:33 +0200 Subject: [PATCH 22/42] ACPI: PM: Add acpi_[un]register_wakeup_handler() commit ddfd9dcf270ce23ed1985b66fcfa163920e2e1b8 upstream. Since commit fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") the SCI triggering without there being a wakeup cause recognized by the ACPI sleep code will no longer wakeup the system. This works as intended, but this is a problem for devices where the SCI is shared with another device which is also a wakeup source. In the past these, from the pov of the ACPI sleep code, spurious SCIs would still cause a wakeup so the wakeup from the device sharing the interrupt would actually wakeup the system. This now no longer works. This is a problem on e.g. Bay Trail-T and Cherry Trail devices where some peripherals (typically the XHCI controller) can signal a Power Management Event (PME) to the Power Management Controller (PMC) to wakeup the system, this uses the same interrupt as the SCI. These wakeups are handled through a special INT0002 ACPI device which checks for events in the GPE0a_STS for this and takes care of acking the PME so that the shared interrupt stops triggering. The change to the ACPI sleep code to ignore the spurious SCI, causes the system to no longer wakeup on these PME events. To make things worse this means that the INT0002 device driver interrupt handler will no longer run, causing the PME to not get cleared and resulting in the system hanging. Trying to wakeup the system after such a PME through e.g. the power button no longer works. Add an acpi_register_wakeup_handler() function which registers a handler to be called from acpi_s2idle_wake() and when the handler returns true, return true from acpi_s2idle_wake(). The INT0002 driver will use this mechanism to check the GPE0a_STS register from acpi_s2idle_wake() and to tell the system to wakeup if a PME is signaled in the register. Fixes: fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") Cc: 5.4+ # 5.4+ Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/sleep.c | 4 +++ drivers/acpi/sleep.h | 1 + drivers/acpi/wakeup.c | 81 +++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 5 +++ 4 files changed, 91 insertions(+) diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index ce59a3f32eac..abd39cc5ff88 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -1009,6 +1009,10 @@ static bool acpi_s2idle_wake(void) if (acpi_any_fixed_event_status_set()) return true; + /* Check wakeups from drivers sharing the SCI. */ + if (acpi_check_wakeup_handlers()) + return true; + /* * If there are no EC events to process and at least one of the * other enabled GPEs is active, the wakeup is regarded as a diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h index 41675d24a9bc..3d90480ce1b1 100644 --- a/drivers/acpi/sleep.h +++ b/drivers/acpi/sleep.h @@ -2,6 +2,7 @@ extern void acpi_enable_wakeup_devices(u8 sleep_state); extern void acpi_disable_wakeup_devices(u8 sleep_state); +extern bool acpi_check_wakeup_handlers(void); extern struct list_head acpi_wakeup_device_list; extern struct mutex acpi_device_lock; diff --git a/drivers/acpi/wakeup.c b/drivers/acpi/wakeup.c index 9614126bf56e..90c40f992e13 100644 --- a/drivers/acpi/wakeup.c +++ b/drivers/acpi/wakeup.c @@ -12,6 +12,15 @@ #include "internal.h" #include "sleep.h" +struct acpi_wakeup_handler { + struct list_head list_node; + bool (*wakeup)(void *context); + void *context; +}; + +static LIST_HEAD(acpi_wakeup_handler_head); +static DEFINE_MUTEX(acpi_wakeup_handler_mutex); + /* * We didn't lock acpi_device_lock in the file, because it invokes oops in * suspend/resume and isn't really required as this is called in S-state. At @@ -96,3 +105,75 @@ int __init acpi_wakeup_device_init(void) mutex_unlock(&acpi_device_lock); return 0; } + +/** + * acpi_register_wakeup_handler - Register wakeup handler + * @wake_irq: The IRQ through which the device may receive wakeups + * @wakeup: Wakeup-handler to call when the SCI has triggered a wakeup + * @context: Context to pass to the handler when calling it + * + * Drivers which may share an IRQ with the SCI can use this to register + * a handler which returns true when the device they are managing wants + * to trigger a wakeup. + */ +int acpi_register_wakeup_handler(int wake_irq, bool (*wakeup)(void *context), + void *context) +{ + struct acpi_wakeup_handler *handler; + + /* + * If the device is not sharing its IRQ with the SCI, there is no + * need to register the handler. + */ + if (!acpi_sci_irq_valid() || wake_irq != acpi_sci_irq) + return 0; + + handler = kmalloc(sizeof(*handler), GFP_KERNEL); + if (!handler) + return -ENOMEM; + + handler->wakeup = wakeup; + handler->context = context; + + mutex_lock(&acpi_wakeup_handler_mutex); + list_add(&handler->list_node, &acpi_wakeup_handler_head); + mutex_unlock(&acpi_wakeup_handler_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(acpi_register_wakeup_handler); + +/** + * acpi_unregister_wakeup_handler - Unregister wakeup handler + * @wakeup: Wakeup-handler passed to acpi_register_wakeup_handler() + * @context: Context passed to acpi_register_wakeup_handler() + */ +void acpi_unregister_wakeup_handler(bool (*wakeup)(void *context), + void *context) +{ + struct acpi_wakeup_handler *handler; + + mutex_lock(&acpi_wakeup_handler_mutex); + list_for_each_entry(handler, &acpi_wakeup_handler_head, list_node) { + if (handler->wakeup == wakeup && handler->context == context) { + list_del(&handler->list_node); + kfree(handler); + break; + } + } + mutex_unlock(&acpi_wakeup_handler_mutex); +} +EXPORT_SYMBOL_GPL(acpi_unregister_wakeup_handler); + +bool acpi_check_wakeup_handlers(void) +{ + struct acpi_wakeup_handler *handler; + + /* No need to lock, nothing else is running when we're called. */ + list_for_each_entry(handler, &acpi_wakeup_handler_head, list_node) { + if (handler->wakeup(handler->context)) + return true; + } + + return false; +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 8b4e516bac00..ce29a014e591 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -473,6 +473,11 @@ void __init acpi_nvs_nosave_s3(void); void __init acpi_sleep_no_blacklist(void); #endif /* CONFIG_PM_SLEEP */ +int acpi_register_wakeup_handler( + int wake_irq, bool (*wakeup)(void *context), void *context); +void acpi_unregister_wakeup_handler( + bool (*wakeup)(void *context), void *context); + struct acpi_osc_context { char *uuid_str; /* UUID string */ int rev; From e30a21c6fea55892a3bee813e6e60e5f5547841e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 3 Apr 2020 17:48:34 +0200 Subject: [PATCH 23/42] platform/x86: intel_int0002_vgpio: Use acpi_register_wakeup_handler() commit 767191db8220db29f78c031f4d27375173c336d5 upstream. The Power Management Events (PMEs) the INT0002 driver listens for get signalled by the Power Management Controller (PMC) using the same IRQ as used for the ACPI SCI. Since commit fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") the SCI triggering, without there being a wakeup cause recognized by the ACPI sleep code, will no longer wakeup the system. This breaks PMEs / wakeups signalled to the INT0002 driver, the system never leaves the s2idle_loop() now. Use acpi_register_wakeup_handler() to register a function which checks the GPE0a_STS register for a PME and trigger a wakeup when a PME has been signalled. Fixes: fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system") Cc: 5.4+ # 5.4+ Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel_int0002_vgpio.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c index af233b7b77f2..29d12f2f59e0 100644 --- a/drivers/platform/x86/intel_int0002_vgpio.c +++ b/drivers/platform/x86/intel_int0002_vgpio.c @@ -127,6 +127,14 @@ static irqreturn_t int0002_irq(int irq, void *data) return IRQ_HANDLED; } +static bool int0002_check_wake(void *data) +{ + u32 gpe_sts_reg; + + gpe_sts_reg = inl(GPE0A_STS_PORT); + return (gpe_sts_reg & GPE0A_PME_B0_STS_BIT); +} + static struct irq_chip int0002_byt_irqchip = { .name = DRV_NAME, .irq_ack = int0002_irq_ack, @@ -220,6 +228,7 @@ static int int0002_probe(struct platform_device *pdev) gpiochip_set_chained_irqchip(chip, irq_chip, irq, NULL); + acpi_register_wakeup_handler(irq, int0002_check_wake, NULL); device_init_wakeup(dev, true); return 0; } @@ -227,6 +236,7 @@ static int int0002_probe(struct platform_device *pdev) static int int0002_remove(struct platform_device *pdev) { device_init_wakeup(&pdev->dev, false); + acpi_unregister_wakeup_handler(int0002_check_wake, NULL); return 0; } From 9351dee1cc242dceb5feab3fc7d32e7900065e1c Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 6 Mar 2020 23:29:27 +0100 Subject: [PATCH 24/42] ASoC: jz4740-i2s: Fix divider written at incorrect offset in register commit 9401d5aa328e64617d87abd59af1c91cace4c3e4 upstream. The 4-bit divider value was written at offset 8, while the jz4740 programming manual locates it at offset 0. Fixes: 26b0aad80a86 ("ASoC: jz4740: Add dynamic sampling rate support to jz4740-i2s") Signed-off-by: Paul Cercueil Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200306222931.39664-2-paul@crapouillou.net Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/jz4740/jz4740-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/jz4740/jz4740-i2s.c b/sound/soc/jz4740/jz4740-i2s.c index 13408de34055..0bbd86390be5 100644 --- a/sound/soc/jz4740/jz4740-i2s.c +++ b/sound/soc/jz4740/jz4740-i2s.c @@ -83,7 +83,7 @@ #define JZ_AIC_I2S_STATUS_BUSY BIT(2) #define JZ_AIC_CLK_DIV_MASK 0xf -#define I2SDIV_DV_SHIFT 8 +#define I2SDIV_DV_SHIFT 0 #define I2SDIV_DV_MASK (0xf << I2SDIV_DV_SHIFT) #define I2SDIV_IDV_SHIFT 8 #define I2SDIV_IDV_MASK (0xf << I2SDIV_IDV_SHIFT) From cd38d8b231f16a4825597081794fa2253836912d Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 26 Mar 2020 12:38:14 -0400 Subject: [PATCH 25/42] IB/hfi1: Call kobject_put() when kobject_init_and_add() fails commit dfb5394f804ed4fcea1fc925be275a38d66712ab upstream. When kobject_init_and_add() returns an error in the function hfi1_create_port_files(), the function kobject_put() is not called for the corresponding kobject, which potentially leads to memory leak. This patch fixes the issue by calling kobject_put() even if kobject_init_and_add() fails. Cc: Link: https://lore.kernel.org/r/20200326163813.21129.44280.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/sysfs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 90f62c4bddba..37e9861adc07 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -674,7 +674,11 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sc2vl sysfs info, (err %d) port %u\n", ret, port_num); - goto bail; + /* + * Based on the documentation for kobject_init_and_add(), the + * caller should call kobject_put even if this call fails. + */ + goto bail_sc2vl; } kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD); @@ -684,7 +688,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping sl2sc sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sc2vl; + goto bail_sl2sc; } kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD); @@ -694,7 +698,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping vl2mtu sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_sl2sc; + goto bail_vl2mtu; } kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD); @@ -704,7 +708,7 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, dd_dev_err(dd, "Skipping Congestion Control sysfs info, (err %d) port %u\n", ret, port_num); - goto bail_vl2mtu; + goto bail_cc; } kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); @@ -742,7 +746,6 @@ int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, kobject_put(&ppd->sl2sc_kobj); bail_sc2vl: kobject_put(&ppd->sc2vl_kobj); -bail: return ret; } From ccc2b645de20e0e175c7832630d65ad52b7bebc0 Mon Sep 17 00:00:00 2001 From: Kaike Wan Date: Thu, 26 Mar 2020 12:38:07 -0400 Subject: [PATCH 26/42] IB/hfi1: Fix memory leaks in sysfs registration and unregistration commit 5c15abc4328ad696fa61e2f3604918ed0c207755 upstream. When the hfi1 driver is unloaded, kmemleak will report the following issue: unreferenced object 0xffff8888461a4c08 (size 8): comm "kworker/0:0", pid 5, jiffies 4298601264 (age 2047.134s) hex dump (first 8 bytes): 73 64 6d 61 30 00 ff ff sdma0... backtrace: [<00000000311a6ef5>] kvasprintf+0x62/0xd0 [<00000000ade94d9f>] kobject_set_name_vargs+0x1c/0x90 [<0000000060657dbb>] kobject_init_and_add+0x5d/0xb0 [<00000000346fe72b>] 0xffffffffa0c5ecba [<000000006cfc5819>] 0xffffffffa0c866b9 [<0000000031c65580>] 0xffffffffa0c38e87 [<00000000e9739b3f>] local_pci_probe+0x41/0x80 [<000000006c69911d>] work_for_cpu_fn+0x16/0x20 [<00000000601267b5>] process_one_work+0x171/0x380 [<0000000049a0eefa>] worker_thread+0x1d1/0x3f0 [<00000000909cf2b9>] kthread+0xf8/0x130 [<0000000058f5f874>] ret_from_fork+0x35/0x40 This patch fixes the issue by: - Releasing dd->per_sdma[i].kobject in hfi1_unregister_sysfs(). - This will fix the memory leak. - Calling kobject_put() to unwind operations only for those entries in dd->per_sdma[] whose operations have succeeded (including the current one that has just failed) in hfi1_verbs_register_sysfs(). Cc: Fixes: 0cb2aa690c7e ("IB/hfi1: Add sysfs interface for affinity setup") Link: https://lore.kernel.org/r/20200326163807.21129.27371.stgit@awfm-01.aw.intel.com Reviewed-by: Mike Marciniszyn Signed-off-by: Kaike Wan Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/hfi1/sysfs.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 37e9861adc07..074ec71772d2 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -856,8 +856,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) return 0; bail: - for (i = 0; i < dd->num_sdma; i++) - kobject_del(&dd->per_sdma[i].kobj); + /* + * The function kobject_put() will call kobject_del() if the kobject + * has been added successfully. The sysfs files created under the + * kobject directory will also be removed during the process. + */ + for (; i >= 0; i--) + kobject_put(&dd->per_sdma[i].kobj); return ret; } @@ -870,6 +875,10 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; int i; + /* Unwind operations in hfi1_verbs_register_sysfs() */ + for (i = 0; i < dd->num_sdma; i++) + kobject_put(&dd->per_sdma[i].kobj); + for (i = 0; i < dd->num_pports; i++) { ppd = &dd->pport[i]; From 4ac80b02f10dc027d93ab246e4736f6a739f248c Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 5 Mar 2020 14:38:41 +0200 Subject: [PATCH 27/42] IB/mlx5: Replace tunnel mpls capability bits for tunnel_offloads commit 41e684ef3f37ce6e5eac3fb5b9c7c1853f4b0447 upstream. Until now the flex parser capability was used in ib_query_device() to indicate tunnel_offloads_caps support for mpls_over_gre/mpls_over_udp. Newer devices and firmware will have configurations with the flexparser but without mpls support. Testing for the flex parser capability was a mistake, the tunnel_stateless capability was intended for detecting mpls and was introduced at the same time as the flex parser capability. Otherwise userspace will be incorrectly informed that a future device supports MPLS when it does not. Link: https://lore.kernel.org/r/20200305123841.196086-1-leon@kernel.org Cc: # 4.17 Fixes: e818e255a58d ("IB/mlx5: Expose MPLS related tunneling offloads") Signed-off-by: Alex Vesker Reviewed-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 6 ++---- include/linux/mlx5/mlx5_ifc.h | 6 +++++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0a160fd1383a..4f44a731a48e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1181,12 +1181,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; - if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_GRE) + if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; - if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_UDP) + if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 0cdc8d12785a..acd859ea09d4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -857,7 +857,11 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 swp_csum[0x1]; u8 swp_lso[0x1]; u8 cqe_checksum_full[0x1]; - u8 reserved_at_24[0x5]; + u8 tunnel_stateless_geneve_tx[0x1]; + u8 tunnel_stateless_mpls_over_udp[0x1]; + u8 tunnel_stateless_mpls_over_gre[0x1]; + u8 tunnel_stateless_vxlan_gpe[0x1]; + u8 tunnel_stateless_ipv4_over_vxlan[0x1]; u8 tunnel_stateless_ip_over_ip[0x1]; u8 reserved_at_2a[0x6]; u8 max_vxlan_udp_ports[0x8]; From cf70056626733696dfc008036641721788ad6c9a Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 11 Dec 2019 10:53:36 +0800 Subject: [PATCH 28/42] ARM: imx: Enable ARM_ERRATA_814220 for i.MX6UL and i.MX7D commit 4562fa4c86c92a2df635fe0697c9e06379738741 upstream. ARM_ERRATA_814220 has below description: The v7 ARM states that all cache and branch predictor maintenance operations that do not specify an address execute, relative to each other, in program order. However, because of this erratum, an L2 set/way cache maintenance operation can overtake an L1 set/way cache maintenance operation. This ERRATA only affected the Cortex-A7 and present in r0p2, r0p3, r0p4, r0p5. i.MX6UL and i.MX7D have Cortex-A7 r0p5 inside, need to enable ARM_ERRATA_814220 for proper workaround. Signed-off-by: Anson Huang Signed-off-by: Shawn Guo Cc: Christian Eggers Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index 593bf1519608..4326c8f53462 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -520,6 +520,7 @@ config SOC_IMX6UL bool "i.MX6 UltraLite support" select PINCTRL_IMX6UL select SOC_IMX6 + select ARM_ERRATA_814220 help This enables support for Freescale i.MX6 UltraLite processor. @@ -556,6 +557,7 @@ config SOC_IMX7D select PINCTRL_IMX7D select SOC_IMX7D_CA7 if ARCH_MULTI_V7 select SOC_IMX7D_CM4 if ARM_SINGLE_ARMV7M + select ARM_ERRATA_814220 help This enables support for Freescale i.MX7 Dual processor. From 7dafb2c6fb46166af9fa4c3c57493a48f6ff2950 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Jan 2020 22:51:39 +0100 Subject: [PATCH 29/42] ARM: imx: only select ARM_ERRATA_814220 for ARMv7-A commit c74067a0f776c1d695a713a4388c3b6a094ee40a upstream. i.MX7D is supported for either the v7-A or the v7-M cores, but the latter causes a warning: WARNING: unmet direct dependencies detected for ARM_ERRATA_814220 Depends on [n]: CPU_V7 [=n] Selected by [y]: - SOC_IMX7D [=y] && ARCH_MXC [=y] && (ARCH_MULTI_V7 [=n] || ARM_SINGLE_ARMV7M [=y]) Make the select statement conditional. Fixes: 4562fa4c86c9 ("ARM: imx: Enable ARM_ERRATA_814220 for i.MX6UL and i.MX7D") Signed-off-by: Arnd Bergmann Signed-off-by: Shawn Guo Cc: Christian Eggers Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index 4326c8f53462..95584ee02b55 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -557,7 +557,7 @@ config SOC_IMX7D select PINCTRL_IMX7D select SOC_IMX7D_CA7 if ARCH_MULTI_V7 select SOC_IMX7D_CM4 if ARM_SINGLE_ARMV7M - select ARM_ERRATA_814220 + select ARM_ERRATA_814220 if ARCH_MULTI_V7 help This enables support for Freescale i.MX7 Dual processor. From 56385788f7f6ce5389030a7cc72c54a461b85224 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 20 Dec 2019 09:34:04 -0500 Subject: [PATCH 30/42] ceph: remove the extra slashes in the server path commit 4fbc0c711b2464ee1551850b85002faae0b775d5 upstream. It's possible to pass the mount helper a server path that has more than one contiguous slash character. For example: $ mount -t ceph 192.168.195.165:40176:/// /mnt/cephfs/ In the MDS server side the extra slashes of the server path will be treated as snap dir, and then we can get the following debug logs: ceph: mount opening path // ceph: open_root_inode opening '//' ceph: fill_trace 0000000059b8a3bc is_dentry 0 is_target 1 ceph: alloc_inode 00000000dc4ca00b ceph: get_inode created new inode 00000000dc4ca00b 1.ffffffffffffffff ino 1 ceph: get_inode on 1=1.ffffffffffffffff got 00000000dc4ca00b And then when creating any new file or directory under the mount point, we can hit the following BUG_ON in ceph_fill_trace(): BUG_ON(ceph_snap(dir) != dvino.snap); Have the client ignore the extra slashes in the server path when mounting. This will also canonicalize the path, so that identical mounts can be consilidated. 1) "//mydir1///mydir//" 2) "/mydir1/mydir" 3) "/mydir1/mydir/" Regardless of the internal treatment of these paths, the kernel still stores the original string including the leading '/' for presentation to userland. URL: https://tracker.ceph.com/issues/42771 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- fs/ceph/super.c | 120 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 101 insertions(+), 19 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 62fc7d46032e..54b50452ec1e 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -106,7 +106,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } - static int ceph_sync_fs(struct super_block *sb, int wait) { struct ceph_fs_client *fsc = ceph_sb_to_client(sb); @@ -430,6 +429,73 @@ static int strcmp_null(const char *s1, const char *s2) return strcmp(s1, s2); } +/** + * path_remove_extra_slash - Remove the extra slashes in the server path + * @server_path: the server path and could be NULL + * + * Return NULL if the path is NULL or only consists of "/", or a string + * without any extra slashes including the leading slash(es) and the + * slash(es) at the end of the server path, such as: + * "//dir1////dir2///" --> "dir1/dir2" + */ +static char *path_remove_extra_slash(const char *server_path) +{ + const char *path = server_path; + const char *cur, *end; + char *buf, *p; + int len; + + /* if the server path is omitted */ + if (!path) + return NULL; + + /* remove all the leading slashes */ + while (*path == '/') + path++; + + /* if the server path only consists of slashes */ + if (*path == '\0') + return NULL; + + len = strlen(path); + + buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + end = path + len; + p = buf; + do { + cur = strchr(path, '/'); + if (!cur) + cur = end; + + len = cur - path; + + /* including one '/' */ + if (cur != end) + len += 1; + + memcpy(p, path, len); + p += len; + + while (cur <= end && *cur == '/') + cur++; + path = cur; + } while (path < end); + + *p = '\0'; + + /* + * remove the last slash if there has and just to make sure that + * we will get something like "dir1/dir2" + */ + if (*(--p) == '/') + *p = '\0'; + + return buf; +} + static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_options *new_opt, struct ceph_fs_client *fsc) @@ -437,6 +503,7 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_mount_options *fsopt1 = new_fsopt; struct ceph_mount_options *fsopt2 = fsc->mount_options; int ofs = offsetof(struct ceph_mount_options, snapdir_name); + char *p1, *p2; int ret; ret = memcmp(fsopt1, fsopt2, ofs); @@ -449,9 +516,21 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; - ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); + + p1 = path_remove_extra_slash(fsopt1->server_path); + if (IS_ERR(p1)) + return PTR_ERR(p1); + p2 = path_remove_extra_slash(fsopt2->server_path); + if (IS_ERR(p2)) { + kfree(p1); + return PTR_ERR(p2); + } + ret = strcmp_null(p1, p2); + kfree(p1); + kfree(p2); if (ret) return ret; + ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq); if (ret) return ret; @@ -507,12 +586,14 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, */ dev_name_end = strchr(dev_name, '/'); if (dev_name_end) { - if (strlen(dev_name_end) > 1) { - fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); - if (!fsopt->server_path) { - err = -ENOMEM; - goto out; - } + /* + * The server_path will include the whole chars from userland + * including the leading '/'. + */ + fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL); + if (!fsopt->server_path) { + err = -ENOMEM; + goto out; } } else { dev_name_end = dev_name + strlen(dev_name); @@ -842,7 +923,6 @@ static void destroy_caches(void) ceph_fscache_unregister(); } - /* * ceph_umount_begin - initiate forced umount. Tear down down the * mount, skipping steps that may hang while waiting for server(s). @@ -929,9 +1009,6 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, return root; } - - - /* * mount: join the ceph cluster, and open root directory. */ @@ -945,7 +1022,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { - const char *path; + const char *path, *p; err = __ceph_open_session(fsc->client, started); if (err < 0) goto out; @@ -957,17 +1034,22 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) goto out; } - if (!fsc->mount_options->server_path) { - path = ""; - dout("mount opening path \\t\n"); - } else { - path = fsc->mount_options->server_path + 1; - dout("mount opening path %s\n", path); + p = path_remove_extra_slash(fsc->mount_options->server_path); + if (IS_ERR(p)) { + err = PTR_ERR(p); + goto out; } + /* if the server path is omitted or just consists of '/' */ + if (!p) + path = ""; + else + path = p; + dout("mount opening path '%s'\n", path); ceph_fs_debugfs_init(fsc); root = open_root_dentry(fsc, path, started); + kfree(p); if (IS_ERR(root)) { err = PTR_ERR(root); goto out; From 193490dbe5ba5a3c8aa0e61312d67142483c9498 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 10 Feb 2020 22:51:08 +0100 Subject: [PATCH 31/42] ceph: canonicalize server path in place commit b27a939e8376a3f1ed09b9c33ef44d20f18ec3d0 upstream. syzbot reported that 4fbc0c711b24 ("ceph: remove the extra slashes in the server path") had caused a regression where an allocation could be done under a spinlock -- compare_mount_options() is called by sget_fc() with sb_lock held. We don't really need the supplied server path, so canonicalize it in place and compare it directly. To make this work, the leading slash is kept around and the logic in ceph_real_mount() to skip it is restored. CEPH_MSG_CLIENT_SESSION now reports the same (i.e. canonicalized) path, with the leading slash of course. Fixes: 4fbc0c711b24 ("ceph: remove the extra slashes in the server path") Reported-by: syzbot+98704a51af8e3d9425a9@syzkaller.appspotmail.com Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Signed-off-by: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- fs/ceph/super.c | 118 +++++++++++------------------------------------- fs/ceph/super.h | 2 +- 2 files changed, 28 insertions(+), 92 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 54b50452ec1e..d40658d5e808 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -214,6 +214,26 @@ static match_table_t fsopt_tokens = { {-1, NULL} }; +/* + * Remove adjacent slashes and then the trailing slash, unless it is + * the only remaining character. + * + * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/". + */ +static void canonicalize_path(char *path) +{ + int i, j = 0; + + for (i = 0; path[i] != '\0'; i++) { + if (path[i] != '/' || j < 1 || path[j - 1] != '/') + path[j++] = path[i]; + } + + if (j > 1 && path[j - 1] == '/') + j--; + path[j] = '\0'; +} + static int parse_fsopt_token(char *c, void *private) { struct ceph_mount_options *fsopt = private; @@ -429,73 +449,6 @@ static int strcmp_null(const char *s1, const char *s2) return strcmp(s1, s2); } -/** - * path_remove_extra_slash - Remove the extra slashes in the server path - * @server_path: the server path and could be NULL - * - * Return NULL if the path is NULL or only consists of "/", or a string - * without any extra slashes including the leading slash(es) and the - * slash(es) at the end of the server path, such as: - * "//dir1////dir2///" --> "dir1/dir2" - */ -static char *path_remove_extra_slash(const char *server_path) -{ - const char *path = server_path; - const char *cur, *end; - char *buf, *p; - int len; - - /* if the server path is omitted */ - if (!path) - return NULL; - - /* remove all the leading slashes */ - while (*path == '/') - path++; - - /* if the server path only consists of slashes */ - if (*path == '\0') - return NULL; - - len = strlen(path); - - buf = kmalloc(len + 1, GFP_KERNEL); - if (!buf) - return ERR_PTR(-ENOMEM); - - end = path + len; - p = buf; - do { - cur = strchr(path, '/'); - if (!cur) - cur = end; - - len = cur - path; - - /* including one '/' */ - if (cur != end) - len += 1; - - memcpy(p, path, len); - p += len; - - while (cur <= end && *cur == '/') - cur++; - path = cur; - } while (path < end); - - *p = '\0'; - - /* - * remove the last slash if there has and just to make sure that - * we will get something like "dir1/dir2" - */ - if (*(--p) == '/') - *p = '\0'; - - return buf; -} - static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_options *new_opt, struct ceph_fs_client *fsc) @@ -503,7 +456,6 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, struct ceph_mount_options *fsopt1 = new_fsopt; struct ceph_mount_options *fsopt2 = fsc->mount_options; int ofs = offsetof(struct ceph_mount_options, snapdir_name); - char *p1, *p2; int ret; ret = memcmp(fsopt1, fsopt2, ofs); @@ -513,21 +465,12 @@ static int compare_mount_options(struct ceph_mount_options *new_fsopt, ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); if (ret) return ret; + ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace); if (ret) return ret; - p1 = path_remove_extra_slash(fsopt1->server_path); - if (IS_ERR(p1)) - return PTR_ERR(p1); - p2 = path_remove_extra_slash(fsopt2->server_path); - if (IS_ERR(p2)) { - kfree(p1); - return PTR_ERR(p2); - } - ret = strcmp_null(p1, p2); - kfree(p1); - kfree(p2); + ret = strcmp_null(fsopt1->server_path, fsopt2->server_path); if (ret) return ret; @@ -595,6 +538,8 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, err = -ENOMEM; goto out; } + + canonicalize_path(fsopt->server_path); } else { dev_name_end = dev_name + strlen(dev_name); } @@ -1022,7 +967,9 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) mutex_lock(&fsc->client->mount_mutex); if (!fsc->sb->s_root) { - const char *path, *p; + const char *path = fsc->mount_options->server_path ? + fsc->mount_options->server_path + 1 : ""; + err = __ceph_open_session(fsc->client, started); if (err < 0) goto out; @@ -1034,22 +981,11 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) goto out; } - p = path_remove_extra_slash(fsc->mount_options->server_path); - if (IS_ERR(p)) { - err = PTR_ERR(p); - goto out; - } - /* if the server path is omitted or just consists of '/' */ - if (!p) - path = ""; - else - path = p; dout("mount opening path '%s'\n", path); ceph_fs_debugfs_init(fsc); root = open_root_dentry(fsc, path, started); - kfree(p); if (IS_ERR(root)) { err = PTR_ERR(root); goto out; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f98d9247f9cb..bb12c9f3a218 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -92,7 +92,7 @@ struct ceph_mount_options { char *snapdir_name; /* default ".snap" */ char *mds_namespace; /* default NULL */ - char *server_path; /* default "/" */ + char *server_path; /* default NULL (means "/") */ char *fscache_uniq; /* default NULL */ }; From ab6ee433028847c3e0e6c0c47e7740a42c1f0c00 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 20 Feb 2020 20:04:03 -0800 Subject: [PATCH 32/42] include/uapi/linux/swab.h: fix userspace breakage, use __BITS_PER_LONG for swap commit 467d12f5c7842896d2de3ced74e4147ee29e97c8 upstream. QEMU has a funny new build error message when I use the upstream kernel headers: CC block/file-posix.o In file included from /home/cborntra/REPOS/qemu/include/qemu/timer.h:4, from /home/cborntra/REPOS/qemu/include/qemu/timed-average.h:29, from /home/cborntra/REPOS/qemu/include/block/accounting.h:28, from /home/cborntra/REPOS/qemu/include/block/block_int.h:27, from /home/cborntra/REPOS/qemu/block/file-posix.c:30: /usr/include/linux/swab.h: In function `__swab': /home/cborntra/REPOS/qemu/include/qemu/bitops.h:20:34: error: "sizeof" is not defined, evaluates to 0 [-Werror=undef] 20 | #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) | ^~~~~~ /home/cborntra/REPOS/qemu/include/qemu/bitops.h:20:41: error: missing binary operator before token "(" 20 | #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) | ^ cc1: all warnings being treated as errors make: *** [/home/cborntra/REPOS/qemu/rules.mak:69: block/file-posix.o] Error 1 rm tests/qemu-iotests/socket_scm_helper.o This was triggered by commit d5767057c9a ("uapi: rename ext2_swab() to swab() and share globally in swab.h"). That patch is doing #include but it uses BITS_PER_LONG. The kernel file asm/bitsperlong.h provide only __BITS_PER_LONG. Let us use the __ variant in swap.h Link: http://lkml.kernel.org/r/20200213142147.17604-1-borntraeger@de.ibm.com Fixes: d5767057c9a ("uapi: rename ext2_swab() to swab() and share globally in swab.h") Signed-off-by: Christian Borntraeger Cc: Yury Norov Cc: Allison Randal Cc: Joe Perches Cc: Thomas Gleixner Cc: William Breathitt Gray Cc: Torsten Hilbrich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/swab.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index fa7f97da5b76..7272f85d6d6a 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -135,9 +135,9 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) static __always_inline unsigned long __swab(const unsigned long y) { -#if BITS_PER_LONG == 64 +#if __BITS_PER_LONG == 64 return __swab64(y); -#else /* BITS_PER_LONG == 32 */ +#else /* __BITS_PER_LONG == 32 */ return __swab32(y); #endif } From 51795bcf595d9cfbae6e17cdfd02c25db9b3a62c Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 18 Feb 2020 15:45:38 -0400 Subject: [PATCH 33/42] RDMA/ucma: Put a lock around every call to the rdma_cm layer commit 7c11910783a1ea17e88777552ef146cace607b3c upstream. The rdma_cm must be used single threaded. This appears to be a bug in the design, as it does have lots of locking that seems like it should allow concurrency. However, when it is all said and done every single place that uses the cma_exch() scheme is broken, and all the unlocked reads from the ucma of the cm_id data are wrong too. syzkaller has been finding endless bugs related to this. Fixing this in any elegant way is some enormous amount of work. Take a very big hammer and put a mutex around everything to do with the ucma_context at the top of every syscall. Fixes: 75216638572f ("RDMA/cma: Export rdma cm interface to userspace") Link: https://lore.kernel.org/r/20200218210432.GA31966@ziepe.ca Reported-by: syzbot+adb15cf8c2798e4e0db4@syzkaller.appspotmail.com Reported-by: syzbot+e5579222b6a3edd96522@syzkaller.appspotmail.com Reported-by: syzbot+4b628fcc748474003457@syzkaller.appspotmail.com Reported-by: syzbot+29ee8f76017ce6cf03da@syzkaller.appspotmail.com Reported-by: syzbot+6956235342b7317ec564@syzkaller.appspotmail.com Reported-by: syzbot+b358909d8d01556b790b@syzkaller.appspotmail.com Reported-by: syzbot+6b46b135602a3f3ac99e@syzkaller.appspotmail.com Reported-by: syzbot+8458d13b13562abf6b77@syzkaller.appspotmail.com Reported-by: syzbot+bd034f3fdc0402e942ed@syzkaller.appspotmail.com Reported-by: syzbot+c92378b32760a4eef756@syzkaller.appspotmail.com Reported-by: syzbot+68b44a1597636e0b342c@syzkaller.appspotmail.com Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 49 ++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 0274e9b704be..f4f79f1292b9 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -91,6 +91,7 @@ struct ucma_context { struct ucma_file *file; struct rdma_cm_id *cm_id; + struct mutex mutex; u64 uid; struct list_head list; @@ -216,6 +217,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) init_completion(&ctx->comp); INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; + mutex_init(&ctx->mutex); if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) goto error; @@ -589,6 +591,7 @@ static int ucma_free_ctx(struct ucma_context *ctx) } events_reported = ctx->events_reported; + mutex_destroy(&ctx->mutex); kfree(ctx); return events_reported; } @@ -658,7 +661,10 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + mutex_unlock(&ctx->mutex); + ucma_put_ctx(ctx); return ret; } @@ -681,7 +687,9 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -705,8 +713,10 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -731,8 +741,10 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -752,7 +764,9 @@ static ssize_t ucma_resolve_route(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -841,6 +855,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); memset(&resp, 0, sizeof resp); addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? @@ -864,6 +879,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, ucma_copy_iw_route(&resp, &ctx->cm_id->route); out: + mutex_unlock(&ctx->mutex); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1014,6 +1030,7 @@ static ssize_t ucma_query(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); switch (cmd.option) { case RDMA_USER_CM_QUERY_ADDR: ret = ucma_query_addr(ctx, response, out_len); @@ -1028,6 +1045,7 @@ static ssize_t ucma_query(struct ucma_file *file, ret = -ENOSYS; break; } + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; @@ -1068,7 +1086,9 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); + mutex_lock(&ctx->mutex); ret = rdma_connect(ctx->cm_id, &conn_param); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1089,7 +1109,9 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? cmd.backlog : max_backlog; + mutex_lock(&ctx->mutex); ret = rdma_listen(ctx->cm_id, ctx->backlog); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1112,13 +1134,17 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); + mutex_lock(&ctx->mutex); ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); + mutex_unlock(&ctx->mutex); if (!ret) ctx->uid = cmd.uid; mutex_unlock(&file->mut); - } else + } else { + mutex_lock(&ctx->mutex); ret = __rdma_accept(ctx->cm_id, NULL, NULL); - + mutex_unlock(&ctx->mutex); + } ucma_put_ctx(ctx); return ret; } @@ -1137,7 +1163,9 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1156,7 +1184,9 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); ret = rdma_disconnect(ctx->cm_id); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; } @@ -1187,7 +1217,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file, resp.qp_attr_mask = 0; memset(&qp_attr, 0, sizeof qp_attr); qp_attr.qp_state = cmd.qp_state; + mutex_lock(&ctx->mutex); ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); + mutex_unlock(&ctx->mutex); if (ret) goto out; @@ -1273,9 +1305,13 @@ static int ucma_set_ib_path(struct ucma_context *ctx, struct sa_path_rec opa; sa_convert_path_ib_to_opa(&opa, &sa_path); + mutex_lock(&ctx->mutex); ret = rdma_set_ib_path(ctx->cm_id, &opa); + mutex_unlock(&ctx->mutex); } else { + mutex_lock(&ctx->mutex); ret = rdma_set_ib_path(ctx->cm_id, &sa_path); + mutex_unlock(&ctx->mutex); } if (ret) return ret; @@ -1308,7 +1344,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level, switch (level) { case RDMA_OPTION_ID: + mutex_lock(&ctx->mutex); ret = ucma_set_option_id(ctx, optname, optval, optlen); + mutex_unlock(&ctx->mutex); break; case RDMA_OPTION_IB: ret = ucma_set_option_ib(ctx, optname, optval, optlen); @@ -1368,8 +1406,10 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); + mutex_lock(&ctx->mutex); if (ctx->cm_id->device) ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); + mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; @@ -1412,8 +1452,10 @@ static ssize_t ucma_process_join(struct ucma_file *file, mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); + mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); + mutex_unlock(&ctx->mutex); if (ret) goto err2; @@ -1513,7 +1555,10 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, goto out; } + mutex_lock(&mc->ctx->mutex); rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); + mutex_unlock(&mc->ctx->mutex); + mutex_lock(&mc->ctx->file->mut); ucma_cleanup_mc_events(mc); list_del(&mc->list); From 09583e3f0402607cfc8b6de6ebd118778b05c929 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 27 Feb 2020 16:36:51 -0400 Subject: [PATCH 34/42] RDMA/cma: Teach lockdep about the order of rtnl and lock commit 32ac9e4399b12d3e54d312a0e0e30ed5cd19bd4e upstream. This lock ordering only happens when bonding is enabled and a certain bonding related event fires. However, since it can happen this is a global restriction on lock ordering. Teach lockdep about the order directly and unconditionally so bugs here are found quickly. See https://syzkaller.appspot.com/bug?extid=55de90ab5f44172b0c90 Link: https://lore.kernel.org/r/20200227203651.GA27185@ziepe.ca Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6c12da176981..7c34b716aff9 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4719,6 +4719,19 @@ static int __init cma_init(void) { int ret; + /* + * There is a rare lock ordering dependency in cma_netdev_callback() + * that only happens when bonding is enabled. Teach lockdep that rtnl + * must never be nested under lock so it can find these without having + * to test with bonding. + */ + if (IS_ENABLED(CONFIG_LOCKDEP)) { + rtnl_lock(); + mutex_lock(&lock); + mutex_unlock(&lock); + rtnl_unlock(); + } + cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; From 7f5432c2f446454d1b9376327227aed7c97287f0 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 28 Feb 2020 18:35:34 +0100 Subject: [PATCH 35/42] RDMA/siw: Fix passive connection establishment commit 33fb27fd54465c74cbffba6315b2f043e90cec4c upstream. Holding the rtnl_lock while iterating a devices interface address list potentially causes deadlocks with the cma_netdev_callback. While this was implemented to limit the scope of a wildcard listen to addresses of the current device only, a better solution limits the scope of the socket to the device. This completely avoiding locking, and also results in significant code simplification. Fixes: c421651fa229 ("RDMA/siw: Add missing rtnl_lock around access to ifa") Link: https://lore.kernel.org/r/20200228173534.26815-1-bmt@zurich.ibm.com Reported-by: syzbot+55de90ab5f44172b0c90@syzkaller.appspotmail.com Suggested-by: Jason Gunthorpe Signed-off-by: Bernard Metzler Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/sw/siw/siw_cm.c | 145 ++++++----------------------- 1 file changed, 31 insertions(+), 114 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 31aa41d85ccf..e3bac1a877bb 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1783,14 +1783,23 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) return 0; } -static int siw_listen_address(struct iw_cm_id *id, int backlog, - struct sockaddr *laddr, int addr_family) +/* + * siw_create_listen - Create resources for a listener's IWCM ID @id + * + * Starts listen on the socket address id->local_addr. + * + */ +int siw_create_listen(struct iw_cm_id *id, int backlog) { struct socket *s; struct siw_cep *cep = NULL; struct siw_device *sdev = to_siw_dev(id->device); + int addr_family = id->local_addr.ss_family; int rv = 0, s_val; + if (addr_family != AF_INET && addr_family != AF_INET6) + return -EAFNOSUPPORT; + rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); if (rv < 0) return rv; @@ -1805,9 +1814,25 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, siw_dbg(id->device, "setsockopt error: %d\n", rv); goto error; } - rv = s->ops->bind(s, laddr, addr_family == AF_INET ? - sizeof(struct sockaddr_in) : - sizeof(struct sockaddr_in6)); + if (addr_family == AF_INET) { + struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr); + + /* For wildcard addr, limit binding to current device only */ + if (ipv4_is_zeronet(laddr->sin_addr.s_addr)) + s->sk->sk_bound_dev_if = sdev->netdev->ifindex; + + rv = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in)); + } else { + struct sockaddr_in6 *laddr = &to_sockaddr_in6(id->local_addr); + + /* For wildcard addr, limit binding to current device only */ + if (ipv6_addr_any(&laddr->sin6_addr)) + s->sk->sk_bound_dev_if = sdev->netdev->ifindex; + + rv = s->ops->bind(s, (struct sockaddr *)laddr, + sizeof(struct sockaddr_in6)); + } if (rv) { siw_dbg(id->device, "socket bind error: %d\n", rv); goto error; @@ -1866,7 +1891,7 @@ static int siw_listen_address(struct iw_cm_id *id, int backlog, list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); cep->state = SIW_EPSTATE_LISTENING; - siw_dbg(id->device, "Listen at laddr %pISp\n", laddr); + siw_dbg(id->device, "Listen at laddr %pISp\n", &id->local_addr); return 0; @@ -1924,114 +1949,6 @@ static void siw_drop_listeners(struct iw_cm_id *id) } } -/* - * siw_create_listen - Create resources for a listener's IWCM ID @id - * - * Listens on the socket addresses id->local_addr and id->remote_addr. - * - * If the listener's @id provides a specific local IP address, at most one - * listening socket is created and associated with @id. - * - * If the listener's @id provides the wildcard (zero) local IP address, - * a separate listen is performed for each local IP address of the device - * by creating a listening socket and binding to that local IP address. - * - */ -int siw_create_listen(struct iw_cm_id *id, int backlog) -{ - struct net_device *dev = to_siw_dev(id->device)->netdev; - int rv = 0, listeners = 0; - - siw_dbg(id->device, "backlog %d\n", backlog); - - /* - * For each attached address of the interface, create a - * listening socket, if id->local_addr is the wildcard - * IP address or matches the IP address. - */ - if (id->local_addr.ss_family == AF_INET) { - struct in_device *in_dev = in_dev_get(dev); - struct sockaddr_in s_laddr, *s_raddr; - const struct in_ifaddr *ifa; - - if (!in_dev) { - rv = -ENODEV; - goto out; - } - memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); - s_raddr = (struct sockaddr_in *)&id->remote_addr; - - siw_dbg(id->device, - "laddr %pI4:%d, raddr %pI4:%d\n", - &s_laddr.sin_addr, ntohs(s_laddr.sin_port), - &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); - - rtnl_lock(); - in_dev_for_each_ifa_rtnl(ifa, in_dev) { - if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) || - s_laddr.sin_addr.s_addr == ifa->ifa_address) { - s_laddr.sin_addr.s_addr = ifa->ifa_address; - - rv = siw_listen_address(id, backlog, - (struct sockaddr *)&s_laddr, - AF_INET); - if (!rv) - listeners++; - } - } - rtnl_unlock(); - in_dev_put(in_dev); - } else if (id->local_addr.ss_family == AF_INET6) { - struct inet6_dev *in6_dev = in6_dev_get(dev); - struct inet6_ifaddr *ifp; - struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr), - *s_raddr = &to_sockaddr_in6(id->remote_addr); - - if (!in6_dev) { - rv = -ENODEV; - goto out; - } - siw_dbg(id->device, - "laddr %pI6:%d, raddr %pI6:%d\n", - &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), - &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); - - rtnl_lock(); - list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { - if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) - continue; - if (ipv6_addr_any(&s_laddr->sin6_addr) || - ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) { - struct sockaddr_in6 bind_addr = { - .sin6_family = AF_INET6, - .sin6_port = s_laddr->sin6_port, - .sin6_flowinfo = 0, - .sin6_addr = ifp->addr, - .sin6_scope_id = dev->ifindex }; - - rv = siw_listen_address(id, backlog, - (struct sockaddr *)&bind_addr, - AF_INET6); - if (!rv) - listeners++; - } - } - rtnl_unlock(); - in6_dev_put(in6_dev); - } else { - rv = -EAFNOSUPPORT; - } -out: - if (listeners) - rv = 0; - else if (!rv) - rv = -EINVAL; - - siw_dbg(id->device, "%s\n", rv ? "FAIL" : "OK"); - - return rv; -} - int siw_destroy_listen(struct iw_cm_id *id) { if (!id->provider_data) { From b40f1ae359f20cb10ff262f2deda59bec3457eaf Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Sun, 8 Mar 2020 17:45:27 +0800 Subject: [PATCH 36/42] Bluetooth: RFCOMM: fix ODEBUG bug in rfcomm_dev_ioctl commit 71811cac8532b2387b3414f7cd8fe9e497482864 upstream. Needn't call 'rfcomm_dlc_put' here, because 'rfcomm_dlc_exists' didn't increase dlc->refcnt. Reported-by: syzbot+4496e82090657320efc6@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Suggested-by: Hillf Danton Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/rfcomm/tty.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 0c7d31c6c18c..a58584949a95 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -413,10 +413,8 @@ static int __rfcomm_create_dev(struct sock *sk, void __user *arg) dlc = rfcomm_dlc_exists(&req.src, &req.dst, req.channel); if (IS_ERR(dlc)) return PTR_ERR(dlc); - else if (dlc) { - rfcomm_dlc_put(dlc); + if (dlc) return -EBUSY; - } dlc = rfcomm_dlc_alloc(GFP_KERNEL); if (!dlc) return -ENOMEM; From d020ff5060a40918442dc7898090add16452afc9 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Wed, 18 Mar 2020 12:17:41 +0200 Subject: [PATCH 37/42] RDMA/cm: Update num_paths in cma_resolve_iboe_route error flow commit 987914ab841e2ec281a35b54348ab109b4c0bb4e upstream. After a successful allocation of path_rec, num_paths is set to 1, but any error after such allocation will leave num_paths uncleared. This causes to de-referencing a NULL pointer later on. Hence, num_paths needs to be set back to 0 if such an error occurs. The following crash from syzkaller revealed it. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI CPU: 0 PID: 357 Comm: syz-executor060 Not tainted 4.18.0+ #311 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:ib_copy_path_rec_to_user+0x94/0x3e0 Code: f1 f1 f1 f1 c7 40 0c 00 00 f4 f4 65 48 8b 04 25 28 00 00 00 48 89 45 c8 31 c0 e8 d7 60 24 ff 48 8d 7b 4c 48 89 f8 48 c1 e8 03 <42> 0f b6 14 30 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 RSP: 0018:ffff88006586f980 EFLAGS: 00010207 RAX: 0000000000000009 RBX: 0000000000000000 RCX: 1ffff1000d5fe475 RDX: ffff8800621e17c0 RSI: ffffffff820d45f9 RDI: 000000000000004c RBP: ffff88006586fa50 R08: ffffed000cb0df73 R09: ffffed000cb0df72 R10: ffff88006586fa70 R11: ffffed000cb0df73 R12: 1ffff1000cb0df30 R13: ffff88006586fae8 R14: dffffc0000000000 R15: ffff88006aff2200 FS: 00000000016fc880(0000) GS:ffff88006d000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000040 CR3: 0000000063fec000 CR4: 00000000000006b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? ib_copy_path_rec_from_user+0xcc0/0xcc0 ? __mutex_unlock_slowpath+0xfc/0x670 ? wait_for_completion+0x3b0/0x3b0 ? ucma_query_route+0x818/0xc60 ucma_query_route+0x818/0xc60 ? ucma_listen+0x1b0/0x1b0 ? sched_clock_cpu+0x18/0x1d0 ? sched_clock_cpu+0x18/0x1d0 ? ucma_listen+0x1b0/0x1b0 ? ucma_write+0x292/0x460 ucma_write+0x292/0x460 ? ucma_close_id+0x60/0x60 ? sched_clock_cpu+0x18/0x1d0 ? sched_clock_cpu+0x18/0x1d0 __vfs_write+0xf7/0x620 ? ucma_close_id+0x60/0x60 ? kernel_read+0x110/0x110 ? time_hardirqs_on+0x19/0x580 ? lock_acquire+0x18b/0x3a0 ? finish_task_switch+0xf3/0x5d0 ? _raw_spin_unlock_irq+0x29/0x40 ? _raw_spin_unlock_irq+0x29/0x40 ? finish_task_switch+0x1be/0x5d0 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? security_file_permission+0x172/0x1e0 vfs_write+0x192/0x460 ksys_write+0xc6/0x1a0 ? __ia32_sys_read+0xb0/0xb0 ? entry_SYSCALL_64_after_hwframe+0x3e/0xbe ? do_syscall_64+0x1d/0x470 do_syscall_64+0x9e/0x470 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: 3c86aa70bf67 ("RDMA/cm: Add RDMA CM support for IBoE devices") Link: https://lore.kernel.org/r/20200318101741.47211-1-leon@kernel.org Signed-off-by: Avihai Horon Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7c34b716aff9..8f776b7de45e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2911,6 +2911,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; + route->num_paths = 0; err1: kfree(work); return ret; From d4083258db0463a733fa8da978e314c1c4b5d56c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 9 Mar 2020 21:26:17 -0700 Subject: [PATCH 38/42] blk-mq: Keep set->nr_hw_queues and set->map[].nr_queues in sync commit 6e66b49392419f3fe134e1be583323ef75da1e4b upstream. blk_mq_map_queues() and multiple .map_queues() implementations expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the number of hardware queues. Hence set .nr_queues before calling these functions. This patch fixes the following kernel warning: WARNING: CPU: 0 PID: 2501 at include/linux/cpumask.h:137 Call Trace: blk_mq_run_hw_queue+0x19d/0x350 block/blk-mq.c:1508 blk_mq_run_hw_queues+0x112/0x1a0 block/blk-mq.c:1525 blk_mq_requeue_work+0x502/0x780 block/blk-mq.c:775 process_one_work+0x9af/0x1740 kernel/workqueue.c:2269 worker_thread+0x98/0xe40 kernel/workqueue.c:2415 kthread+0x361/0x430 kernel/kthread.c:255 Fixes: ed76e329d74a ("blk-mq: abstract out queue map") # v5.0 Reported-by: syzbot+d44e1b26ce5c3e77458d@syzkaller.appspotmail.com Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Cc: Johannes Thumshirn Cc: Hannes Reinecke Cc: Ming Lei Cc: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-mq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3c1abab1fdf5..a8c1a45cedde 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3007,6 +3007,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) { + /* + * blk_mq_map_queues() and multiple .map_queues() implementations + * expect that set->map[HCTX_TYPE_DEFAULT].nr_queues is set to the + * number of hardware queues. + */ + if (set->nr_maps == 1) + set->map[HCTX_TYPE_DEFAULT].nr_queues = set->nr_hw_queues; + if (set->ops->map_queues && !is_kdump_kernel()) { int i; From 047affa0ef0026b25e90adad7e2f8f99e11da654 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Sun, 29 Mar 2020 16:56:47 +0800 Subject: [PATCH 39/42] fbcon: fix null-ptr-deref in fbcon_switch commit b139f8b00db4a8ea75a4174346eafa48041aa489 upstream. Set logo_shown to FBCON_LOGO_CANSHOW when the vc was deallocated. syzkaller report: https://lkml.org/lkml/2020/3/27/403 general protection fault, probably for non-canonical address 0xdffffc000000006c: 0000 [#1] SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000360-0x0000000000000367] RIP: 0010:fbcon_switch+0x28f/0x1740 drivers/video/fbdev/core/fbcon.c:2260 Call Trace: redraw_screen+0x2a8/0x770 drivers/tty/vt/vt.c:1008 vc_do_resize+0xfe7/0x1360 drivers/tty/vt/vt.c:1295 fbcon_init+0x1221/0x1ab0 drivers/video/fbdev/core/fbcon.c:1219 visual_init+0x305/0x5c0 drivers/tty/vt/vt.c:1062 do_bind_con_driver+0x536/0x890 drivers/tty/vt/vt.c:3542 do_take_over_console+0x453/0x5b0 drivers/tty/vt/vt.c:4122 do_fbcon_takeover+0x10b/0x210 drivers/video/fbdev/core/fbcon.c:588 fbcon_fb_registered+0x26b/0x340 drivers/video/fbdev/core/fbcon.c:3259 do_register_framebuffer drivers/video/fbdev/core/fbmem.c:1664 [inline] register_framebuffer+0x56e/0x980 drivers/video/fbdev/core/fbmem.c:1832 dlfb_usb_probe.cold+0x1743/0x1ba3 drivers/video/fbdev/udlfb.c:1735 usb_probe_interface+0x310/0x800 drivers/usb/core/driver.c:374 accessing vc_cons[logo_shown].d->vc_top causes the bug. Reported-by: syzbot+732528bae351682f1f27@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Acked-by: Sam Ravnborg Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20200329085647.25133-1-hqjagain@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/core/fbcon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index c9235a2f42f8..22070cfea1d0 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -1276,6 +1276,9 @@ static void fbcon_deinit(struct vc_data *vc) if (!con_is_bound(&fb_con)) fbcon_exit(); + if (vc->vc_num == logo_shown) + logo_shown = FBCON_LOGO_CANSHOW; + return; } From 04ad505eed58f442b8d55004df10456ab2f0ec08 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Tue, 7 Apr 2020 13:32:22 -0700 Subject: [PATCH 40/42] drm/i915: Fix ref->mutex deadlock in i915_active_wait() The following deadlock exists in i915_active_wait() due to a double lock on ref->mutex (call chain listed in order from top to bottom): i915_active_wait(); mutex_lock_interruptible(&ref->mutex); <-- ref->mutex first acquired i915_active_request_retire(); node_retire(); active_retire(); mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); <-- DEADLOCK Fix the deadlock by skipping the second ref->mutex lock when active_retire() is called through i915_active_request_retire(). Note that this bug only affects 5.4 and has since been fixed in 5.5. Normally, a backport of the fix from 5.5 would be in order, but the patch set that fixes this deadlock involves massive changes that are neither feasible nor desirable for backporting [1][2][3]. Therefore, this small patch was made to address the deadlock specifically for 5.4. [1] 274cbf20fd10 ("drm/i915: Push the i915_active.retire into a worker") [2] 093b92287363 ("drm/i915: Split i915_active.mutex into an irq-safe spinlock for the rbtree") [3] 750bde2fd4ff ("drm/i915: Serialise with remote retirement") Fixes: 12c255b5dad1 ("drm/i915: Provide an i915_active.acquire callback") Cc: # 5.4.x Signed-off-by: Sultan Alsawaf Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_active.c | 29 +++++++++++++++++++---------- drivers/gpu/drm/i915/i915_active.h | 4 ++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 48e16ad93bbd..51dc8753b527 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -121,7 +121,7 @@ static inline void debug_active_assert(struct i915_active *ref) { } #endif static void -__active_retire(struct i915_active *ref) +__active_retire(struct i915_active *ref, bool lock) { struct active_node *it, *n; struct rb_root root; @@ -138,7 +138,8 @@ __active_retire(struct i915_active *ref) retire = true; } - mutex_unlock(&ref->mutex); + if (likely(lock)) + mutex_unlock(&ref->mutex); if (!retire) return; @@ -153,21 +154,28 @@ __active_retire(struct i915_active *ref) } static void -active_retire(struct i915_active *ref) +active_retire(struct i915_active *ref, bool lock) { GEM_BUG_ON(!atomic_read(&ref->count)); if (atomic_add_unless(&ref->count, -1, 1)) return; /* One active may be flushed from inside the acquire of another */ - mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); - __active_retire(ref); + if (likely(lock)) + mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); + __active_retire(ref, lock); } static void node_retire(struct i915_active_request *base, struct i915_request *rq) { - active_retire(node_from_active(base)->ref); + active_retire(node_from_active(base)->ref, true); +} + +static void +node_retire_nolock(struct i915_active_request *base, struct i915_request *rq) +{ + active_retire(node_from_active(base)->ref, false); } static struct i915_active_request * @@ -364,7 +372,7 @@ int i915_active_acquire(struct i915_active *ref) void i915_active_release(struct i915_active *ref) { debug_active_assert(ref); - active_retire(ref); + active_retire(ref, true); } static void __active_ungrab(struct i915_active *ref) @@ -391,7 +399,7 @@ void i915_active_ungrab(struct i915_active *ref) { GEM_BUG_ON(!test_bit(I915_ACTIVE_GRAB_BIT, &ref->flags)); - active_retire(ref); + active_retire(ref, true); __active_ungrab(ref); } @@ -421,12 +429,13 @@ int i915_active_wait(struct i915_active *ref) break; } - err = i915_active_request_retire(&it->base, BKL(ref)); + err = i915_active_request_retire(&it->base, BKL(ref), + node_retire_nolock); if (err) break; } - __active_retire(ref); + __active_retire(ref, true); if (err) return err; diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h index f95058f99057..0ad7ef60d15f 100644 --- a/drivers/gpu/drm/i915/i915_active.h +++ b/drivers/gpu/drm/i915/i915_active.h @@ -309,7 +309,7 @@ i915_active_request_isset(const struct i915_active_request *active) */ static inline int __must_check i915_active_request_retire(struct i915_active_request *active, - struct mutex *mutex) + struct mutex *mutex, i915_active_retire_fn retire) { struct i915_request *request; long ret; @@ -327,7 +327,7 @@ i915_active_request_retire(struct i915_active_request *active, list_del_init(&active->link); RCU_INIT_POINTER(active->request, NULL); - active->retire(active, request); + retire(active, request); return 0; } From ad5676629a126afeb8f3aa9d08eaad663cf0e8e2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 15 Jan 2020 11:03:58 +0800 Subject: [PATCH 41/42] iommu/vt-d: Allow devices with RMRRs to use identity domain commit 9235cb13d7d17baba0b3a9277381258361e95c16 upstream. Since commit ea2447f700cab ("intel-iommu: Prevent devices with RMRRs from being placed into SI Domain"), the Intel IOMMU driver doesn't allow any devices with RMRR locked to use the identity domain. This was added to to fix the issue where the RMRR info for devices being placed in and out of the identity domain gets lost. This identity maps all RMRRs when setting up the identity domain, so that devices with RMRRs could also use it. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Cc: Alan Stern Cc: John Donnelly Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9d47b227e557..0d922eeae357 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2762,10 +2762,8 @@ static int __init si_domain_init(int hw) } /* - * Normally we use DMA domains for devices which have RMRRs. But we - * loose this requirement for graphic and usb devices. Identity map - * the RMRRs for graphic and USB devices so that they could use the - * si_domain. + * Identity map the RMRRs so that devices with RMRRs could also use + * the si_domain. */ for_each_rmrr_units(rmrr) { for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, @@ -2773,9 +2771,6 @@ static int __init si_domain_init(int hw) unsigned long long start = rmrr->base_address; unsigned long long end = rmrr->end_address; - if (device_is_rmrr_locked(dev)) - continue; - if (WARN_ON(end < start || end >> agaw_to_width(si_domain->agaw))) continue; @@ -2914,9 +2909,6 @@ static int device_def_domain_type(struct device *dev) if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); - if (device_is_rmrr_locked(dev)) - return IOMMU_DOMAIN_DMA; - /* * Prevent any device marked as untrusted from getting * placed into the statically identity mapping domain. @@ -2954,9 +2946,6 @@ static int device_def_domain_type(struct device *dev) return IOMMU_DOMAIN_DMA; } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) return IOMMU_DOMAIN_DMA; - } else { - if (device_has_rmrr(dev)) - return IOMMU_DOMAIN_DMA; } return (iommu_identity_mapping & IDENTMAP_ALL) ? From bc844d58f697dff3ded4b410094ee89f5cedc04c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 13 Apr 2020 10:48:18 +0200 Subject: [PATCH 42/42] Linux 5.4.32 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index af28533919cc..c2d5975844d9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 31 +SUBLEVEL = 32 EXTRAVERSION = NAME = Kleptomaniac Octopus