net: RPS: Enable hardware acceleration of RFS
Allow drivers for multiqueue hardware with flow filter tables to accelerate RFS. The driver must: 1. Set net_device::rx_cpu_rmap to a cpu_rmap of the RX completion IRQs (in queue order). This will provide a mapping from CPUs to the queues for which completions are handled nearest to them. 2. Implement net_device_ops::ndo_rx_flow_steer. This operation adds or replaces a filter steering the given flow to the given RX queue, if possible. 3. Periodically remove filters for which rps_may_expire_flow() returns true. Signed-off-by: Ben Hutchings <bhutchings@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
c39649c331
commit
c445477d74
@ -554,14 +554,16 @@ struct rps_map {
|
|||||||
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
|
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The rps_dev_flow structure contains the mapping of a flow to a CPU and the
|
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
|
||||||
* tail pointer for that CPU's input queue at the time of last enqueue.
|
* tail pointer for that CPU's input queue at the time of last enqueue, and
|
||||||
|
* a hardware filter index.
|
||||||
*/
|
*/
|
||||||
struct rps_dev_flow {
|
struct rps_dev_flow {
|
||||||
u16 cpu;
|
u16 cpu;
|
||||||
u16 fill;
|
u16 filter;
|
||||||
unsigned int last_qtail;
|
unsigned int last_qtail;
|
||||||
};
|
};
|
||||||
|
#define RPS_NO_FILTER 0xffff
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The rps_dev_flow_table structure contains a table of flow mappings.
|
* The rps_dev_flow_table structure contains a table of flow mappings.
|
||||||
@ -611,6 +613,11 @@ static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
|
|||||||
|
|
||||||
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
|
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
|
||||||
|
|
||||||
|
#ifdef CONFIG_RFS_ACCEL
|
||||||
|
extern bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
|
||||||
|
u32 flow_id, u16 filter_id);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* This structure contains an instance of an RX queue. */
|
/* This structure contains an instance of an RX queue. */
|
||||||
struct netdev_rx_queue {
|
struct netdev_rx_queue {
|
||||||
struct rps_map __rcu *rps_map;
|
struct rps_map __rcu *rps_map;
|
||||||
@ -769,6 +776,13 @@ struct netdev_tc_txq {
|
|||||||
* is always called from the stack with the rtnl lock held and netif tx
|
* is always called from the stack with the rtnl lock held and netif tx
|
||||||
* queues stopped. This allows the netdevice to perform queue management
|
* queues stopped. This allows the netdevice to perform queue management
|
||||||
* safely.
|
* safely.
|
||||||
|
*
|
||||||
|
* RFS acceleration.
|
||||||
|
* int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb,
|
||||||
|
* u16 rxq_index, u32 flow_id);
|
||||||
|
* Set hardware filter for RFS. rxq_index is the target queue index;
|
||||||
|
* flow_id is a flow ID to be passed to rps_may_expire_flow() later.
|
||||||
|
* Return the filter ID on success, or a negative error code.
|
||||||
*/
|
*/
|
||||||
#define HAVE_NET_DEVICE_OPS
|
#define HAVE_NET_DEVICE_OPS
|
||||||
struct net_device_ops {
|
struct net_device_ops {
|
||||||
@ -842,6 +856,12 @@ struct net_device_ops {
|
|||||||
int (*ndo_fcoe_get_wwn)(struct net_device *dev,
|
int (*ndo_fcoe_get_wwn)(struct net_device *dev,
|
||||||
u64 *wwn, int type);
|
u64 *wwn, int type);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef CONFIG_RFS_ACCEL
|
||||||
|
int (*ndo_rx_flow_steer)(struct net_device *dev,
|
||||||
|
const struct sk_buff *skb,
|
||||||
|
u16 rxq_index,
|
||||||
|
u32 flow_id);
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1056,6 +1076,13 @@ struct net_device {
|
|||||||
|
|
||||||
/* Number of RX queues currently active in device */
|
/* Number of RX queues currently active in device */
|
||||||
unsigned int real_num_rx_queues;
|
unsigned int real_num_rx_queues;
|
||||||
|
|
||||||
|
#ifdef CONFIG_RFS_ACCEL
|
||||||
|
/* CPU reverse-mapping for RX completion interrupts, indexed
|
||||||
|
* by RX queue number. Assigned by driver. This must only be
|
||||||
|
* set if the ndo_rx_flow_steer operation is defined. */
|
||||||
|
struct cpu_rmap *rx_cpu_rmap;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
rx_handler_func_t __rcu *rx_handler;
|
rx_handler_func_t __rcu *rx_handler;
|
||||||
|
@ -221,6 +221,12 @@ config RPS
|
|||||||
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
|
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
|
||||||
default y
|
default y
|
||||||
|
|
||||||
|
config RFS_ACCEL
|
||||||
|
boolean
|
||||||
|
depends on RPS && GENERIC_HARDIRQS
|
||||||
|
select CPU_RMAP
|
||||||
|
default y
|
||||||
|
|
||||||
config XPS
|
config XPS
|
||||||
boolean
|
boolean
|
||||||
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
|
depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
|
||||||
|
@ -132,6 +132,7 @@
|
|||||||
#include <trace/events/skb.h>
|
#include <trace/events/skb.h>
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
#include <linux/inetdevice.h>
|
#include <linux/inetdevice.h>
|
||||||
|
#include <linux/cpu_rmap.h>
|
||||||
|
|
||||||
#include "net-sysfs.h"
|
#include "net-sysfs.h"
|
||||||
|
|
||||||
@ -2588,6 +2589,53 @@ EXPORT_SYMBOL(__skb_get_rxhash);
|
|||||||
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
|
struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
|
||||||
EXPORT_SYMBOL(rps_sock_flow_table);
|
EXPORT_SYMBOL(rps_sock_flow_table);
|
||||||
|
|
||||||
|
static struct rps_dev_flow *
|
||||||
|
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
||||||
|
struct rps_dev_flow *rflow, u16 next_cpu)
|
||||||
|
{
|
||||||
|
u16 tcpu;
|
||||||
|
|
||||||
|
tcpu = rflow->cpu = next_cpu;
|
||||||
|
if (tcpu != RPS_NO_CPU) {
|
||||||
|
#ifdef CONFIG_RFS_ACCEL
|
||||||
|
struct netdev_rx_queue *rxqueue;
|
||||||
|
struct rps_dev_flow_table *flow_table;
|
||||||
|
struct rps_dev_flow *old_rflow;
|
||||||
|
u32 flow_id;
|
||||||
|
u16 rxq_index;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Should we steer this flow to a different hardware queue? */
|
||||||
|
if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap)
|
||||||
|
goto out;
|
||||||
|
rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
|
||||||
|
if (rxq_index == skb_get_rx_queue(skb))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
rxqueue = dev->_rx + rxq_index;
|
||||||
|
flow_table = rcu_dereference(rxqueue->rps_flow_table);
|
||||||
|
if (!flow_table)
|
||||||
|
goto out;
|
||||||
|
flow_id = skb->rxhash & flow_table->mask;
|
||||||
|
rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
|
||||||
|
rxq_index, flow_id);
|
||||||
|
if (rc < 0)
|
||||||
|
goto out;
|
||||||
|
old_rflow = rflow;
|
||||||
|
rflow = &flow_table->flows[flow_id];
|
||||||
|
rflow->cpu = next_cpu;
|
||||||
|
rflow->filter = rc;
|
||||||
|
if (old_rflow->filter == rflow->filter)
|
||||||
|
old_rflow->filter = RPS_NO_FILTER;
|
||||||
|
out:
|
||||||
|
#endif
|
||||||
|
rflow->last_qtail =
|
||||||
|
per_cpu(softnet_data, tcpu).input_queue_head;
|
||||||
|
}
|
||||||
|
|
||||||
|
return rflow;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* get_rps_cpu is called from netif_receive_skb and returns the target
|
* get_rps_cpu is called from netif_receive_skb and returns the target
|
||||||
* CPU from the RPS map of the receiving queue for a given skb.
|
* CPU from the RPS map of the receiving queue for a given skb.
|
||||||
@ -2658,12 +2706,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||||||
if (unlikely(tcpu != next_cpu) &&
|
if (unlikely(tcpu != next_cpu) &&
|
||||||
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
|
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
|
||||||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
|
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
|
||||||
rflow->last_qtail)) >= 0)) {
|
rflow->last_qtail)) >= 0))
|
||||||
tcpu = rflow->cpu = next_cpu;
|
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
|
||||||
if (tcpu != RPS_NO_CPU)
|
|
||||||
rflow->last_qtail = per_cpu(softnet_data,
|
|
||||||
tcpu).input_queue_head;
|
|
||||||
}
|
|
||||||
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
|
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
|
||||||
*rflowp = rflow;
|
*rflowp = rflow;
|
||||||
cpu = tcpu;
|
cpu = tcpu;
|
||||||
@ -2684,6 +2729,46 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|||||||
return cpu;
|
return cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_RFS_ACCEL
|
||||||
|
|
||||||
|
/**
|
||||||
|
* rps_may_expire_flow - check whether an RFS hardware filter may be removed
|
||||||
|
* @dev: Device on which the filter was set
|
||||||
|
* @rxq_index: RX queue index
|
||||||
|
* @flow_id: Flow ID passed to ndo_rx_flow_steer()
|
||||||
|
* @filter_id: Filter ID returned by ndo_rx_flow_steer()
|
||||||
|
*
|
||||||
|
* Drivers that implement ndo_rx_flow_steer() should periodically call
|
||||||
|
* this function for each installed filter and remove the filters for
|
||||||
|
* which it returns %true.
|
||||||
|
*/
|
||||||
|
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
|
||||||
|
u32 flow_id, u16 filter_id)
|
||||||
|
{
|
||||||
|
struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
|
||||||
|
struct rps_dev_flow_table *flow_table;
|
||||||
|
struct rps_dev_flow *rflow;
|
||||||
|
bool expire = true;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
flow_table = rcu_dereference(rxqueue->rps_flow_table);
|
||||||
|
if (flow_table && flow_id <= flow_table->mask) {
|
||||||
|
rflow = &flow_table->flows[flow_id];
|
||||||
|
cpu = ACCESS_ONCE(rflow->cpu);
|
||||||
|
if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
|
||||||
|
((int)(per_cpu(softnet_data, cpu).input_queue_head -
|
||||||
|
rflow->last_qtail) <
|
||||||
|
(int)(10 * flow_table->mask)))
|
||||||
|
expire = false;
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
return expire;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(rps_may_expire_flow);
|
||||||
|
|
||||||
|
#endif /* CONFIG_RFS_ACCEL */
|
||||||
|
|
||||||
/* Called from hardirq (IPI) context */
|
/* Called from hardirq (IPI) context */
|
||||||
static void rps_trigger_softirq(void *data)
|
static void rps_trigger_softirq(void *data)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user