From 0414c78f14861cb704d6e6888efd53dd36e3bdde Mon Sep 17 00:00:00 2001 From: Anatole Denis Date: Wed, 4 Oct 2017 01:17:14 +0100 Subject: [PATCH 01/10] netfilter: nft_set_hash: disable fast_ops for 2-len keys jhash_1word of a u16 is a different value from jhash of the same u16 with length 2. Since elements are always inserted in sets using jhash over the actual klen, this would lead to incorrect lookups on fixed-size sets with a key length of 2, as they would be inserted with hash value jhash(key, 2) and looked up with hash value jhash_1word(key), which is different. Example reproducer(v4.13+), using anonymous sets which always have a fixed size: table inet t { chain c { type filter hook output priority 0; policy accept; tcp dport { 10001, 10003, 10005, 10007, 10009 } counter packets 4 bytes 240 reject tcp dport 10001 counter packets 4 bytes 240 reject tcp dport 10003 counter packets 4 bytes 240 reject tcp dport 10005 counter packets 4 bytes 240 reject tcp dport 10007 counter packets 0 bytes 0 reject tcp dport 10009 counter packets 4 bytes 240 reject } } then use nc -z localhost to probe; incorrectly hashed ports will pass through the set lookup and increment the counter of an individual rule. jhash being seeded with a random value, it is not deterministic which ports will incorrectly hash, but in testing with 5 ports in the set I always had 4 or 5 with an incorrect hash value. Signed-off-by: Anatole Denis Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_hash.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 0fa01d772c5e..9c0d5a7ce5f9 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -643,7 +643,6 @@ nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, { if (desc->size) { switch (desc->klen) { - case 2: case 4: return &nft_hash_fast_ops; default: From 7400bb4b5800831581a82f71700af6a5e815c3c8 Mon Sep 17 00:00:00 2001 From: Tejaswi Tanikella Date: Mon, 23 Oct 2017 11:46:03 +0530 Subject: [PATCH 02/10] netfilter: nf_reject_ipv4: Fix use-after-free in send_reset niph is not updated after pskb_expand_head changes the skb head. It still points to the freed data, which is then used to update tot_len and checksum. This could cause use-after-free poison crash. Update niph, if ip_route_me_harder does not fail. This only affects the interaction with REJECT targets and br_netfilter. Signed-off-by: Tejaswi Tanikella Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_reject_ipv4.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index eeacbdaf7cdf..5cd06ba3535d 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -132,6 +132,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) goto free_nskb; + niph = ip_hdr(nskb); + /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; From 4eebff27ca4182bbf5f039dd60d79e2d7c0a707e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 1 Nov 2017 16:32:15 +0300 Subject: [PATCH 03/10] tcp_nv: fix division by zero in tcpnv_acked() Average RTT could become zero. This happened in real life at least twice. This patch treats zero as 1us. Signed-off-by: Konstantin Khlebnikov Acked-by: Lawrence Brakmo Signed-off-by: David S. Miller --- net/ipv4/tcp_nv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 1ff73982e28c..125fc1450b01 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -252,7 +252,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) /* rate in 100's bits per second */ rate64 = ((u64)sample->in_flight) * 8000000; - rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100)); + rate = (u32)div64_u64(rate64, (u64)(avg_rtt ?: 1) * 100); /* Remember the maximum rate seen during this RTT * Note: It may be more than one RTT. This function should be From 18129a24983906eaf2a2d448ce4b83e27091ebe2 Mon Sep 17 00:00:00 2001 From: Jeff Barnhill <0xeffeff@gmail.com> Date: Wed, 1 Nov 2017 14:58:09 +0000 Subject: [PATCH 04/10] net: vrf: correct FRA_L3MDEV encode type FRA_L3MDEV is defined as U8, but is being added as a U32 attribute. On big endian architecture, this results in the l3mdev entry not being added to the FIB rules. Fixes: 1aa6c4f6b8cd8 ("net: vrf: Add l3mdev rules on first device create") Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9b243e6f3008..7dc3bcac3506 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1165,7 +1165,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) frh->family = family; frh->action = FR_ACT_TO_TBL; - if (nla_put_u32(skb, FRA_L3MDEV, 1)) + if (nla_put_u8(skb, FRA_L3MDEV, 1)) goto nla_put_failure; if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF)) From a159d3c4b8291998c018f0dbddd4678315264a1e Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 1 Nov 2017 10:23:49 -0700 Subject: [PATCH 05/10] net_sched: acquire RTNL in tc_action_net_exit() I forgot to acquire RTNL in tc_action_net_exit() which leads that action ops->cleanup() is not always called with RTNL. This usually is not a big deal because this function is called after all netns refcnt are gone, but given RTNL protects more than just actions, add it for safety and consistency. Also add an assertion to catch other potential bugs. Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Reported-by: Lucas Bates Tested-by: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 ++ net/sched/act_api.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/net/act_api.h b/include/net/act_api.h index b944e0eb93be..5072446d5f06 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -122,7 +122,9 @@ void tcf_idrinfo_destroy(const struct tc_action_ops *ops, static inline void tc_action_net_exit(struct tc_action_net *tn) { + rtnl_lock(); tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + rtnl_unlock(); kfree(tn->idrinfo); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index da6fa82c98a8..8f2c63514956 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -86,6 +86,8 @@ int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; + ASSERT_RTNL(); + if (p) { if (bind) p->tcfa_bindcnt--; From ceffcc5e254b450e6159f173e4538215cebf1b59 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 1 Nov 2017 10:23:50 -0700 Subject: [PATCH 06/10] net_sched: hold netns refcnt for each action TC actions have been destroyed asynchronously for a long time, previously in a RCU callback and now in a workqueue. If we don't hold a refcnt for its netns, we could use the per netns data structure, struct tcf_idrinfo, after it has been freed by netns workqueue. Hold refcnt to ensure netns destroy happens after all actions are gone. Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Reported-by: Lucas Bates Tested-by: Lucas Bates Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 4 +++- net/sched/act_api.c | 2 ++ net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 2 +- net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 4 ++-- net/sched/act_mirred.c | 2 +- net/sched/act_nat.c | 2 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 2 +- net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 2 +- net/sched/act_skbmod.c | 2 +- net/sched/act_tunnel_key.c | 2 +- net/sched/act_vlan.c | 2 +- 18 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 5072446d5f06..c68551255032 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -13,6 +13,7 @@ struct tcf_idrinfo { spinlock_t lock; struct idr action_idr; + struct net *net; }; struct tc_action_ops; @@ -104,7 +105,7 @@ struct tc_action_net { static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, struct net *net) { int err = 0; @@ -112,6 +113,7 @@ int tc_action_net_init(struct tc_action_net *tn, if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; + tn->idrinfo->net = net; spin_lock_init(&tn->idrinfo->lock); idr_init(&tn->idrinfo->action_idr); return err; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8f2c63514956..ca2ff0b3123f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -78,6 +78,7 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) spin_lock_bh(&idrinfo->lock); idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); spin_unlock_bh(&idrinfo->lock); + put_net(idrinfo->net); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } @@ -336,6 +337,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); + get_net(idrinfo->net); *a = p; return 0; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c0c707eb2c96..9bce8cc84cbb 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -398,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops); + return tc_action_net_init(tn, &act_bpf_ops, net); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 10b7a8855a6c..34e52d01a5dd 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops); + return tc_action_net_init(tn, &act_connmark_ops, net); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 1c40caadcff9..35171df2ebef 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -626,7 +626,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops); + return tc_action_net_init(tn, &act_csum_ops, net); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e29a48ef7fc3..ef7f7f39d26d 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops); + return tc_action_net_init(tn, &act_gact_ops, net); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 8ccd35825b6b..f65e4b5058e0 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -818,7 +818,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops); + return tc_action_net_init(tn, &act_ife_ops, net); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d9e399a7e3d5..dbdf3b2470d5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops); + return tc_action_net_init(tn, &act_ipt_ops, net); } static void __net_exit ipt_exit_net(struct net *net) @@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops); + return tc_action_net_init(tn, &act_xt_ops, net); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 416627c66f08..84759cfd5a33 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -343,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops); + return tc_action_net_init(tn, &act_mirred_ops, net); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c365d01b99c8..7eeaaf9217b6 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops); + return tc_action_net_init(tn, &act_nat_ops, net); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 491fe5deb09e..b3d82c334a5f 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -450,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops); + return tc_action_net_init(tn, &act_pedit_ops, net); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 3bb2ebf9e9ae..9ec42b26e4b9 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops); + return tc_action_net_init(tn, &act_police_ops, net); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 8b5abcd2f32f..e69a1e3a39bf 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -240,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops); + return tc_action_net_init(tn, &act_sample_ops, net); } static void __net_exit sample_exit_net(struct net *net) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index e7b57e5071a3..a8d0ea95f894 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops); + return tc_action_net_init(tn, &act_simp_ops, net); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 59949d61f20d..fbac62472e09 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -238,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops); + return tc_action_net_init(tn, &act_skbedit_ops, net); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index b642ad3d39dd..8e12d8897d2f 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -263,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops); + return tc_action_net_init(tn, &act_skbmod_ops, net); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 30c96274c638..c33faa373cf2 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -322,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops); + return tc_action_net_init(tn, &act_tunnel_key_ops, net); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 16eb067a8d8f..115fc33cc6d8 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -269,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops); + return tc_action_net_init(tn, &act_vlan_ops, net); } static void __net_exit vlan_exit_net(struct net *net) From e73b49ebd9deeb0ff2cd9ac6fd9c72a433d1e062 Mon Sep 17 00:00:00 2001 From: Bhadram Varka Date: Thu, 2 Nov 2017 12:52:13 +0530 Subject: [PATCH 07/10] stmmac: use of_property_read_u32 instead of read_u8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Numbers in DT are stored in “cells” which are 32-bits in size. of_property_read_u8 does not work properly because of endianness problem. This causes it to always return 0 with little-endian architectures. Fix it by using of_property_read_u32() OF API. Signed-off-by: Bhadram Varka Signed-off-by: David S. Miller --- .../ethernet/stmicro/stmmac/stmmac_platform.c | 16 ++++++++-------- include/linux/stmmac.h | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 6383695004a5..195eb7e71473 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -168,8 +168,8 @@ static void stmmac_mtl_setup(struct platform_device *pdev, } /* Processing RX queues common config */ - if (of_property_read_u8(rx_node, "snps,rx-queues-to-use", - &plat->rx_queues_to_use)) + if (of_property_read_u32(rx_node, "snps,rx-queues-to-use", + &plat->rx_queues_to_use)) plat->rx_queues_to_use = 1; if (of_property_read_bool(rx_node, "snps,rx-sched-sp")) @@ -191,8 +191,8 @@ static void stmmac_mtl_setup(struct platform_device *pdev, else plat->rx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB; - if (of_property_read_u8(q_node, "snps,map-to-dma-channel", - &plat->rx_queues_cfg[queue].chan)) + if (of_property_read_u32(q_node, "snps,map-to-dma-channel", + &plat->rx_queues_cfg[queue].chan)) plat->rx_queues_cfg[queue].chan = queue; /* TODO: Dynamic mapping to be included in the future */ @@ -222,8 +222,8 @@ static void stmmac_mtl_setup(struct platform_device *pdev, } /* Processing TX queues common config */ - if (of_property_read_u8(tx_node, "snps,tx-queues-to-use", - &plat->tx_queues_to_use)) + if (of_property_read_u32(tx_node, "snps,tx-queues-to-use", + &plat->tx_queues_to_use)) plat->tx_queues_to_use = 1; if (of_property_read_bool(tx_node, "snps,tx-sched-wrr")) @@ -244,8 +244,8 @@ static void stmmac_mtl_setup(struct platform_device *pdev, if (queue >= plat->tx_queues_to_use) break; - if (of_property_read_u8(q_node, "snps,weight", - &plat->tx_queues_cfg[queue].weight)) + if (of_property_read_u32(q_node, "snps,weight", + &plat->tx_queues_cfg[queue].weight)) plat->tx_queues_cfg[queue].weight = 0x10 + queue; if (of_property_read_bool(q_node, "snps,dcb-algorithm")) { diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 108739ff9223..32feac5bbd75 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -126,14 +126,14 @@ struct stmmac_axi { struct stmmac_rxq_cfg { u8 mode_to_use; - u8 chan; + u32 chan; u8 pkt_route; bool use_prio; u32 prio; }; struct stmmac_txq_cfg { - u8 weight; + u32 weight; u8 mode_to_use; /* Credit Base Shaper parameters */ u32 send_slope; @@ -168,8 +168,8 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; - u8 rx_queues_to_use; - u8 tx_queues_to_use; + u32 rx_queues_to_use; + u32 tx_queues_to_use; u8 rx_sched_algorithm; u8 tx_sched_algorithm; struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; From 25dd169aea6553aea548197a5d4580bbdeda1c85 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Nov 2017 16:02:20 +0100 Subject: [PATCH 08/10] fib: fib_dump_info can no longer use __in_dev_get_rtnl syzbot reported yet another regression added with DOIT_UNLOCKED. When nexthop is marked as dead, fib_dump_info uses __in_dev_get_rtnl(): ./include/linux/inetdevice.h:230 suspicious rcu_dereference_protected() usage! rcu_scheduler_active = 2, debug_locks = 1 1 lock held by syz-executor2/23859: #0: (rcu_read_lock){....}, at: [] inet_rtm_getroute+0xaa0/0x2d70 net/ipv4/route.c:2738 [..] lockdep_rcu_suspicious+0x123/0x170 kernel/locking/lockdep.c:4665 __in_dev_get_rtnl include/linux/inetdevice.h:230 [inline] fib_dump_info+0x1136/0x13d0 net/ipv4/fib_semantics.c:1377 inet_rtm_getroute+0xf97/0x2d70 net/ipv4/route.c:2785 .. This isn't safe anymore, callers either hold RTNL mutex or rcu read lock, so these spots must use rcu_dereference_rtnl() or plain rcu_derefence() (plus unconditional rcu read lock). This does the latter. Fixes: 394f51abb3d04f ("ipv4: route: set ipv4 RTM_GETROUTE to not use rtnl") Reported-by: syzbot Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 57a5d48acee8..01ed22139ac2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1365,8 +1365,6 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { - struct in_device *in_dev; - if (fi->fib_nh->nh_gw && nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; @@ -1374,10 +1372,14 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) goto nla_put_failure; if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { - in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev); + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rtm->rtm_flags |= RTNH_F_DEAD; + rcu_read_unlock(); } if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD) rtm->rtm_flags |= RTNH_F_OFFLOAD; @@ -1400,18 +1402,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; for_nexthops(fi) { - struct in_device *in_dev; - rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; rtnh->rtnh_flags = nh->nh_flags & 0xFF; if (nh->nh_flags & RTNH_F_LINKDOWN) { - in_dev = __in_dev_get_rtnl(nh->nh_dev); + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(nh->nh_dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rtnh->rtnh_flags |= RTNH_F_DEAD; + rcu_read_unlock(); } rtnh->rtnh_hops = nh->nh_weight - 1; rtnh->rtnh_ifindex = nh->nh_oif; From 3b11775033dc87c3d161996c54507b15ba26414a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Nov 2017 12:30:25 -0700 Subject: [PATCH 09/10] tcp: do not mangle skb->cb[] in tcp_make_synack() Christoph Paasch sent a patch to address the following issue : tcp_make_synack() is leaving some TCP private info in skb->cb[], then send the packet by other means than tcp_transmit_skb() tcp_transmit_skb() makes sure to clear skb->cb[] to not confuse IPv4/IPV6 stacks, but we have no such cleanup for SYNACK. tcp_make_synack() should not use tcp_init_nondata_skb() : tcp_init_nondata_skb() really should be limited to skbs put in write/rtx queues (the ones that are only sent via tcp_transmit_skb()) This patch fixes the issue and should even save few cpu cycles ;) Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses") Signed-off-by: Eric Dumazet Reported-by: Christoph Paasch Reviewed-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 823003eef3a2..478909f4694d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3180,13 +3180,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; skb->mark = ireq->ir_mark; - /* Setting of flags are superfluous here for callers (and ECE is - * not even correctly set) - */ - tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, - TCPHDR_SYN | TCPHDR_ACK); - - th->seq = htonl(TCP_SKB_CB(skb)->seq); + skb->ip_summed = CHECKSUM_PARTIAL; + th->seq = htonl(tcp_rsk(req)->snt_isn); /* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); From 93824c80bf47ebe087414b3a40ca0ff9aab7d1fb Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 2 Nov 2017 16:08:40 -0700 Subject: [PATCH 10/10] net: systemport: Correct IPG length settings Due to a documentation mistake, the IPG length was set to 0x12 while it should have been 12 (decimal). This would affect short packet (64B typically) performance since the IPG was bigger than necessary. Fixes: 44a4524c54af ("net: systemport: Add support for SYSTEMPORT Lite") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 83eec9a8c275..eb441e5e2cd8 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1809,15 +1809,17 @@ static inline void bcm_sysport_mask_all_intrs(struct bcm_sysport_priv *priv) static inline void gib_set_pad_extension(struct bcm_sysport_priv *priv) { - u32 __maybe_unused reg; + u32 reg; - /* Include Broadcom tag in pad extension */ + reg = gib_readl(priv, GIB_CONTROL); + /* Include Broadcom tag in pad extension and fix up IPG_LENGTH */ if (netdev_uses_dsa(priv->netdev)) { - reg = gib_readl(priv, GIB_CONTROL); reg &= ~(GIB_PAD_EXTENSION_MASK << GIB_PAD_EXTENSION_SHIFT); reg |= ENET_BRCM_TAG_LEN << GIB_PAD_EXTENSION_SHIFT; - gib_writel(priv, reg, GIB_CONTROL); } + reg &= ~(GIB_IPG_LEN_MASK << GIB_IPG_LEN_SHIFT); + reg |= 12 << GIB_IPG_LEN_SHIFT; + gib_writel(priv, reg, GIB_CONTROL); } static int bcm_sysport_open(struct net_device *dev)