diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 13ad434643b8..8ab0b5a8dfef 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -470,13 +470,14 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, memset(&fl6, 0, sizeof(fl6)); /* needed to match OIF rule */ - fl6.flowi6_l3mdev = dev->ifindex; + fl6.flowi6_oif = dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = iph->nexthdr; + fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst) || dst == dst_null) @@ -549,10 +550,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, memset(&fl4, 0, sizeof(fl4)); /* needed to match OIF rule */ - fl4.flowi4_l3mdev = vrf_dev->ifindex; + fl4.flowi4_oif = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.flowi4_tos = RT_TOS(ip4h->tos); - fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; + fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; diff --git a/include/net/flow.h b/include/net/flow.h index 1acce55a9b46..39d0cedcddee 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -29,7 +29,6 @@ struct flowi_tunnel { struct flowi_common { int flowic_oif; int flowic_iif; - int flowic_l3mdev; __u32 flowic_mark; __u8 flowic_tos; __u8 flowic_scope; @@ -37,6 +36,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 +#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; struct flowi_tunnel flowic_tun_key; @@ -71,7 +71,6 @@ struct flowi4 { struct flowi_common __fl_common; #define flowi4_oif __fl_common.flowic_oif #define flowi4_iif __fl_common.flowic_iif -#define flowi4_l3mdev __fl_common.flowic_l3mdev #define flowi4_mark __fl_common.flowic_mark #define flowi4_tos __fl_common.flowic_tos #define flowi4_scope __fl_common.flowic_scope @@ -105,7 +104,6 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, { fl4->flowi4_oif = oif; fl4->flowi4_iif = LOOPBACK_IFINDEX; - fl4->flowi4_l3mdev = 0; fl4->flowi4_mark = mark; fl4->flowi4_tos = tos; fl4->flowi4_scope = scope; @@ -136,7 +134,6 @@ struct flowi6 { struct flowi_common __fl_common; #define flowi6_oif __fl_common.flowic_oif #define flowi6_iif __fl_common.flowic_iif -#define flowi6_l3mdev __fl_common.flowic_l3mdev #define flowi6_mark __fl_common.flowic_mark #define flowi6_scope __fl_common.flowic_scope #define flowi6_proto __fl_common.flowic_proto @@ -183,7 +180,6 @@ struct flowi { } u; #define flowi_oif u.__fl_common.flowic_oif #define flowi_iif u.__fl_common.flowic_iif -#define flowi_l3mdev u.__fl_common.flowic_l3mdev #define flowi_mark u.__fl_common.flowic_mark #define flowi_tos u.__fl_common.flowic_tos #define flowi_scope u.__fl_common.flowic_scope diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 5e2a003cd83c..0394146f813c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -290,7 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); struct flowi4 fl4 = { .flowi4_iif = LOOPBACK_IFINDEX, - .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), + .flowi4_oif = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, .flowi4_scope = scope, @@ -352,8 +352,9 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); - fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; + fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); + if (!fl4.flowi4_iif) + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ea17da3b9735..5f786cff2e41 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -2267,7 +2267,7 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb) { - if (fl4->flowi4_oif) + if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) goto check_saddr; #ifdef CONFIG_IP_ROUTE_MULTIPATH diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 0f0da157989e..456240d2adc1 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1384,8 +1384,11 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) return false; - if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif) - return false; + if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { + if (flp->flowi4_oif && + flp->flowi4_oif != nhc->nhc_oif) + return false; + } return true; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 18eef4417913..273177f5076d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2191,7 +2191,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - fl4.flowi4_l3mdev = 0; fl4.flowi4_oif = 0; fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; @@ -2664,7 +2663,8 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, res->fi = NULL; res->table = NULL; if (fl4->flowi4_oif && - (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { + (ipv4_is_multicast(fl4->daddr) || + !netif_index_is_l3_master(net, fl4->flowi4_oif))) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 4548a91acdc8..9ebd54752e03 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -28,11 +28,13 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, memset(fl4, 0, sizeof(*fl4)); fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; - fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); + fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); fl4->flowi4_mark = mark; if (saddr) fl4->saddr = saddr->a4; + fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; + rt = __ip_route_output_key(net, fl4); if (!IS_ERR(rt)) return &rt->dst; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 26d8105981e9..436733021b1e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1067,7 +1067,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { dst_release(dst); dst = NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 89aea829374e..b9b21f612a1f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1207,6 +1207,9 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_node *fn; struct rt6_info *rt; + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + flags &= ~RT6_LOOKUP_F_IFACE; + rcu_read_lock(); fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: @@ -2180,6 +2183,9 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + oif = 0; + redo_rt6_select: rt6_select(net, fn, oif, res, strict); if (res->f6i == net->ipv6.fib6_null_entry) { @@ -2926,6 +2932,12 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_info *rt; struct fib6_node *fn; + /* l3mdev_update_flow overrides oif if the device is enslaved; in + * this case we must match on the real ingress device, so reset it + */ + if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) + fl6->flowi6_oif = skb->dev->ifindex; + /* Get the "current" route for this destination and * check if the redirect has come from appropriate router. * diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 492b9692c0dc..7c903e0e446c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -33,7 +33,8 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, int err; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); + fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); + fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; fl6.flowi6_mark = mark; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 42794581762c..f2c3a61ad134 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -249,19 +249,25 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct net_device *dev; int rc = 0; - /* update flow ensures flowi_l3mdev is set when relevant */ - if (!fl->flowi_l3mdev) - return 0; - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev); + dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_fib_table) { arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); rc = 1; + goto out; } + dev = dev_get_by_index_rcu(net, fl->flowi_iif); + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_fib_table) { + arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); + rc = 1; + goto out; + } + +out: rcu_read_unlock(); return rc; @@ -270,28 +276,31 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, void l3mdev_update_flow(struct net *net, struct flowi *fl) { struct net_device *dev; + int ifindex; rcu_read_lock(); if (fl->flowi_oif) { dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev) { - if (!fl->flowi_l3mdev) - fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); - - /* oif set to L3mdev directs lookup to its table; - * reset to avoid oif match in fib_lookup - */ - if (netif_is_l3_master(dev)) - fl->flowi_oif = 0; - goto out; + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_oif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + goto out; + } } } - if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) { + if (fl->flowi_iif) { dev = dev_get_by_index_rcu(net, fl->flowi_iif); - if (dev) - fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_iif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + } + } } out: diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index acffe0029fdd..e13b0fb63333 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -741,7 +741,7 @@ ipv4_ping_vrf() log_start show_hint "Fails since address on vrf device is out of device scope" run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} - log_test_addr ${a} $? 2 "ping local, device bind" + log_test_addr ${a} $? 1 "ping local, device bind" done #