Merge branch 'nvme-5.8' of git://git.infradead.org/nvme into for-5.8/drivers

Pull NVMe updates from Christoph:

"The second large batch of nvme updates:

 - t10 protection information support for nvme-rdma and nvmet-rdma
   (Israel Rukshin and Max Gurtovoy)
 - target side AEN improvements (Chaitanya Kulkarni)
 - various fixes and minor improvements all over, icluding the nvme part
   of the lpfc driver"

* 'nvme-5.8' of git://git.infradead.org/nvme: (38 commits)
  lpfc: Fix return value in __lpfc_nvme_ls_abort
  lpfc: fix axchg pointer reference after free and double frees
  lpfc: Fix pointer checks and comments in LS receive refactoring
  nvme: set dma alignment to qword
  nvmet: cleanups the loop in nvmet_async_events_process
  nvmet: fix memory leak when removing namespaces and controllers concurrently
  nvmet-rdma: add metadata/T10-PI support
  nvmet: add metadata support for block devices
  nvmet: add metadata/T10-PI support
  nvme: add Metadata Capabilities enumerations
  nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len
  nvmet: rename nvmet_rw_len to nvmet_rw_data_len
  nvmet: add metadata characteristics for a namespace
  nvme-rdma: add metadata/T10-PI support
  nvme-rdma: introduce nvme_rdma_sgl structure
  nvme: introduce NVME_INLINE_METADATA_SG_CNT
  nvme: enforce extended LBA format for fabrics metadata
  nvme: introduce max_integrity_segments ctrl attribute
  nvme: make nvme_ns_has_pi accessible to transports
  nvme: introduce NVME_NS_METADATA_SUPPORTED flag
  ...
This commit is contained in:
Jens Axboe
2020-05-27 05:17:10 -06:00
24 changed files with 1116 additions and 222 deletions

View File

@ -19,7 +19,6 @@
#include <linux/pr.h>
#include <linux/ptrace.h>
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
#include <asm/unaligned.h>
@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
nvme_put_ctrl(ctrl);
}
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
{
return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
}
static blk_status_t nvme_error_status(u16 status)
{
switch (status & 0x7ff) {
@ -552,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
if (ret)
return ret;
goto out_disable_stream;
ctrl->nssa = le16_to_cpu(s.nssa);
if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
dev_info(ctrl->device, "too few streams (%u) available\n",
ctrl->nssa);
nvme_disable_streams(ctrl);
return 0;
goto out_disable_stream;
}
ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
return 0;
out_disable_stream:
nvme_disable_streams(ctrl);
return ret;
}
/*
@ -1302,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
meta_len = (io.nblocks + 1) * ns->ms;
metadata = nvme_to_user_ptr(io.metadata);
if (ns->ext) {
if (ns->features & NVME_NS_EXT_LBAS) {
length += meta_len;
meta_len = 0;
} else if (meta_len) {
@ -1696,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
u32 max_integrity_segments)
{
struct blk_integrity integrity;
@ -1719,10 +1717,11 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
}
integrity.tuple_size = ms;
blk_integrity_register(disk, &integrity);
blk_queue_max_integrity_segments(disk->queue, 1);
blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
}
#else
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
u32 max_integrity_segments)
{
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
@ -1842,7 +1841,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
unsigned short bs = 1 << ns->lba_shift;
u32 atomic_bs, phys_bs, io_opt;
u32 atomic_bs, phys_bs, io_opt = 0;
if (ns->lba_shift > PAGE_SHIFT) {
/* unsupported block size, set capacity to 0 later */
@ -1851,7 +1850,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
atomic_bs = phys_bs = io_opt = bs;
atomic_bs = phys_bs = bs;
nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt);
if (id->nabo == 0) {
/*
@ -1882,13 +1881,28 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_queue_io_min(disk->queue, phys_bs);
blk_queue_io_opt(disk->queue, io_opt);
if (ns->ms && !ns->ext &&
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
nvme_init_integrity(disk, ns->ms, ns->pi_type);
if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
ns->lba_shift > PAGE_SHIFT)
/*
* The block layer can't support LBA sizes larger than the page size
* yet, so catch this early and don't allow block I/O.
*/
if (ns->lba_shift > PAGE_SHIFT)
capacity = 0;
/*
* Register a metadata profile for PI, or the plain non-integrity NVMe
* metadata masquerading as Type 0 if supported, otherwise reject block
* I/O to namespaces with metadata except when the namespace supports
* PI, as it can strip/insert in that case.
*/
if (ns->ms) {
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
(ns->features & NVME_NS_METADATA_SUPPORTED))
nvme_init_integrity(disk, ns->ms, ns->pi_type,
ns->ctrl->max_integrity_segments);
else if (!nvme_ns_has_pi(ns))
capacity = 0;
}
set_capacity_revalidate_and_notify(disk, capacity, false);
nvme_config_discard(disk, ns);
@ -1902,9 +1916,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_unfreeze_queue(disk->queue);
}
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
u32 iob;
/*
@ -1915,20 +1930,43 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->lba_shift == 0)
ns->lba_shift = 9;
if ((ns->ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ns->ctrl->max_hw_sectors))
iob = ns->ctrl->max_hw_sectors;
if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors;
else
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
else
ns->pi_type = 0;
if (ns->ms) {
/*
* For PCIe only the separate metadata pointer is supported,
* as the block layer supplies metadata in a separate bio_vec
* chain. For Fabrics, only metadata as part of extended data
* LBA is supported on the wire per the Fabrics specification,
* but the HBA/HCA will do the remapping from the separate
* metadata buffers for us.
*/
if (id->flbas & NVME_NS_FLBAS_META_EXT) {
ns->features |= NVME_NS_EXT_LBAS;
if ((ctrl->ops->flags & NVME_F_FABRICS) &&
(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) &&
ctrl->max_integrity_segments)
ns->features |= NVME_NS_METADATA_SUPPORTED;
} else {
if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS))
return -EINVAL;
if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
ns->features |= NVME_NS_METADATA_SUPPORTED;
}
}
if (iob)
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
nvme_update_disk_info(disk, ns, id);
@ -1939,6 +1977,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
revalidate_disk(ns->head->disk);
}
#endif
return 0;
}
static int nvme_revalidate_disk(struct gendisk *disk)
@ -1974,7 +2013,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
goto free_id;
}
__nvme_revalidate_disk(disk, id);
ret = __nvme_revalidate_disk(disk, id);
free_id:
kfree(id);
out:
@ -2283,6 +2322,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
blk_queue_virt_boundary(q, ctrl->page_size - 1);
blk_queue_dma_alignment(q, 7);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true;
blk_queue_write_cache(q, vwc, vwc);
@ -3628,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
ns->disk = disk;
__nvme_revalidate_disk(disk, id);
if (__nvme_revalidate_disk(disk, id))
goto out_free_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
ret = nvme_nvm_register(ns, disk_name, node);
@ -3655,6 +3696,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
/* prevent double queue cleanup */
ns->disk->queue = NULL;
put_disk(ns->disk);
out_free_disk:
del_gendisk(ns->disk);
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);

View File

@ -108,7 +108,7 @@ struct nvme_fc_fcp_op {
struct nvme_fcp_op_w_sgl {
struct nvme_fc_fcp_op op;
struct scatterlist sgl[NVME_INLINE_SG_CNT];
uint8_t priv[0];
uint8_t priv[];
};
struct nvme_fc_lport {
@ -3246,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
"while waiting for remoteport connectivity.\n",
ctrl->cnum, portptr->dev_loss_tmo);
ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
(ctrl->ctrl.opts->max_reconnects *
ctrl->ctrl.opts->reconnect_delay)));
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
}
}

View File

@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl {
__le32 tdresv;
__le32 thresv;
__le32 rsvd2[8];
__u8 blk[0];
__u8 blk[];
};
struct nvme_nvm_id20_addrf {
@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
geo = &dev->geo;
geo->csecs = 1 << ns->lba_shift;
geo->sos = ns->ms;
geo->ext = ns->ext;
if (ns->features & NVME_NS_EXT_LBAS)
geo->ext = true;
else
geo->ext = false;
geo->mdts = ns->ctrl->max_hw_sectors;
dev->q = q;

View File

@ -16,6 +16,7 @@
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
#include <linux/wait.h>
#include <linux/t10-pi.h>
#include <trace/events/block.h>
@ -30,8 +31,10 @@ extern unsigned int admin_timeout;
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define NVME_INLINE_SG_CNT 0
#define NVME_INLINE_METADATA_SG_CNT 0
#else
#define NVME_INLINE_SG_CNT 2
#define NVME_INLINE_METADATA_SG_CNT 1
#endif
extern struct workqueue_struct *nvme_wq;
@ -228,6 +231,7 @@ struct nvme_ctrl {
u32 page_size;
u32 max_hw_sectors;
u32 max_segments;
u32 max_integrity_segments;
u16 crdt[3];
u16 oncs;
u16 oacs;
@ -364,6 +368,11 @@ struct nvme_ns_head {
#endif
};
enum nvme_ns_features {
NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
};
struct nvme_ns {
struct list_head list;
@ -383,8 +392,8 @@ struct nvme_ns {
u16 ms;
u16 sgs;
u32 sws;
bool ext;
u8 pi_type;
unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
@ -394,6 +403,12 @@ struct nvme_ns {
};
/* NVMe ns supports metadata actions by the controller (generate/strip) */
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
{
return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
}
struct nvme_ctrl_ops {
const char *name;
struct module *module;
@ -497,7 +512,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
void nvme_put_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_identify(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);

View File

@ -68,14 +68,30 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
static int io_queue_count_set(const char *val, const struct kernel_param *kp)
{
unsigned int n;
int ret;
ret = kstrtouint(val, 10, &n);
if (ret != 0 || n > num_possible_cpus())
return -EINVAL;
return param_set_uint(val, kp);
}
static const struct kernel_param_ops io_queue_count_ops = {
.set = io_queue_count_set,
.get = param_get_uint,
};
static unsigned int write_queues;
module_param(write_queues, uint, 0644);
module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644);
MODULE_PARM_DESC(write_queues,
"Number of queues to use for writes. If not set, reads and writes "
"will share a queue set.");
static unsigned int poll_queues;
module_param(poll_queues, uint, 0644);
module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
@ -2549,6 +2565,12 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
/*
* We do not support an SGL for metadata (yet), so we are limited to a
* single integrity segment for the separate metadata pointer.
*/
dev->ctrl.max_integrity_segments = 1;
result = nvme_init_identify(&dev->ctrl);
if (result)
goto out;
@ -3118,8 +3140,6 @@ static int __init nvme_init(void)
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
write_queues = min(write_queues, num_possible_cpus());
poll_queues = min(poll_queues, num_possible_cpus());
return pci_register_driver(&nvme_driver);
}

View File

@ -34,6 +34,11 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 4
#define NVME_RDMA_DATA_SGL_SIZE \
(sizeof(struct scatterlist) * NVME_INLINE_SG_CNT)
#define NVME_RDMA_METADATA_SGL_SIZE \
(sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT)
struct nvme_rdma_device {
struct ib_device *dev;
struct ib_pd *pd;
@ -48,6 +53,11 @@ struct nvme_rdma_qe {
u64 dma;
};
struct nvme_rdma_sgl {
int nents;
struct sg_table sg_table;
};
struct nvme_rdma_queue;
struct nvme_rdma_request {
struct nvme_request req;
@ -58,12 +68,12 @@ struct nvme_rdma_request {
refcount_t ref;
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
u32 num_sge;
int nents;
struct ib_reg_wr reg_wr;
struct ib_cqe reg_cqe;
struct nvme_rdma_queue *queue;
struct sg_table sg_table;
struct scatterlist first_sgl[];
struct nvme_rdma_sgl data_sgl;
struct nvme_rdma_sgl *metadata_sgl;
bool use_sig_mr;
};
enum nvme_rdma_queue_flags {
@ -85,6 +95,7 @@ struct nvme_rdma_queue {
struct rdma_cm_id *cm_id;
int cm_error;
struct completion cm_done;
bool pi_support;
};
struct nvme_rdma_ctrl {
@ -261,6 +272,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
init_attr.qp_type = IB_QPT_RC;
init_attr.send_cq = queue->ib_cq;
init_attr.recv_cq = queue->ib_cq;
if (queue->pi_support)
init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
@ -290,6 +303,12 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
if (!req->sqe.data)
return -ENOMEM;
/* metadata nvme_rdma_sgl struct is located after command's data SGL */
if (queue->pi_support)
req->metadata_sgl = (void *)nvme_req(rq) +
sizeof(struct nvme_rdma_request) +
NVME_RDMA_DATA_SGL_SIZE;
req->queue = queue;
return 0;
@ -400,6 +419,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
dev = queue->device;
ibdev = dev->dev;
if (queue->pi_support)
ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs);
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
/*
@ -416,10 +437,16 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
nvme_rdma_dev_put(dev);
}
static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
{
return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
ibdev->attrs.max_fast_reg_page_list_len - 1);
u32 max_page_list_len;
if (pi_support)
max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len;
else
max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len;
return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
}
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
@ -476,7 +503,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
* misaligned we'll end up using two entries for a single data page,
* so one additional entry is required.
*/
pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1;
pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1;
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
queue->queue_size,
IB_MR_TYPE_MEM_REG,
@ -488,10 +515,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
goto out_destroy_ring;
}
if (queue->pi_support) {
ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs,
queue->queue_size, IB_MR_TYPE_INTEGRITY,
pages_per_mr, pages_per_mr);
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize PI MR pool sized %d for QID %d\n",
queue->queue_size, idx);
goto out_destroy_mr_pool;
}
}
set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
return 0;
out_destroy_mr_pool:
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
out_destroy_ring:
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@ -513,6 +554,10 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
queue = &ctrl->queues[idx];
queue->ctrl = ctrl;
if (idx && ctrl->ctrl.max_integrity_segments)
queue->pi_support = true;
else
queue->pi_support = false;
init_completion(&queue->cm_done);
if (idx > 0)
@ -723,7 +768,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
NVME_RDMA_DATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
@ -737,7 +782,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
NVME_RDMA_DATA_SGL_SIZE;
if (nctrl->max_integrity_segments)
set->cmd_size += sizeof(struct nvme_rdma_sgl) +
NVME_RDMA_METADATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
@ -770,6 +818,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool new)
{
bool pi_capable = false;
int error;
error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
@ -779,7 +828,13 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->device = ctrl->queues[0].device;
ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
/* T10-PI support */
if (ctrl->device->dev->attrs.device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER)
pi_capable = true;
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
pi_capable);
/*
* Bind the async event SQE DMA mapping to the admin queue lifetime.
@ -821,6 +876,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->ctrl.max_segments = ctrl->max_fr_pages;
ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
if (pi_capable)
ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
else
ctrl->ctrl.max_integrity_segments = 0;
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@ -1149,17 +1208,29 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
struct list_head *pool = &queue->qp->rdma_mrs;
if (!blk_rq_nr_phys_segments(rq))
return;
if (blk_integrity_rq(rq)) {
ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
req->metadata_sgl->nents, rq_dma_dir(rq));
sg_free_table_chained(&req->metadata_sgl->sg_table,
NVME_INLINE_METADATA_SG_CNT);
}
if (req->use_sig_mr)
pool = &queue->qp->sig_mrs;
if (req->mr) {
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
ib_mr_pool_put(queue->qp, pool, req->mr);
req->mr = NULL;
}
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
rq_dma_dir(rq));
sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
@ -1178,7 +1249,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
int count)
{
struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
struct scatterlist *sgl = req->sg_table.sgl;
struct scatterlist *sgl = req->data_sgl.sg_table.sgl;
struct ib_sge *sge = &req->sge[1];
u32 len = 0;
int i;
@ -1203,8 +1274,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
{
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl));
put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length);
sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl));
put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length);
put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
return 0;
@ -1225,7 +1296,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
* Align the MR to a 4K page size to match the ctrl page size and
* the block virtual boundary.
*/
nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL,
SZ_4K);
if (unlikely(nr < count)) {
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
req->mr = NULL;
@ -1256,12 +1328,125 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
return 0;
}
static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
struct nvme_command *cmd, struct ib_sig_domain *domain,
u16 control, u8 pi_type)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
domain->sig.dif.bg_type = IB_T10DIF_CRC;
domain->sig.dif.pi_interval = 1 << bi->interval_exp;
domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
if (control & NVME_RW_PRINFO_PRCHK_REF)
domain->sig.dif.ref_remap = true;
domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
domain->sig.dif.app_escape = true;
if (pi_type == NVME_NS_DPS_PI_TYPE3)
domain->sig.dif.ref_escape = true;
}
static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
u8 pi_type)
{
u16 control = le16_to_cpu(cmd->rw.control);
memset(sig_attrs, 0, sizeof(*sig_attrs));
if (control & NVME_RW_PRINFO_PRACT) {
/* for WRITE_INSERT/READ_STRIP no memory domain */
sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
pi_type);
/* Clear the PRACT bit since HCA will generate/verify the PI */
control &= ~NVME_RW_PRINFO_PRACT;
cmd->rw.control = cpu_to_le16(control);
} else {
/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
pi_type);
nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
pi_type);
}
}
static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask)
{
*mask = 0;
if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF)
*mask |= IB_SIG_CHECK_REFTAG;
if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD)
*mask |= IB_SIG_CHECK_GUARD;
}
static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc)
{
if (unlikely(wc->status != IB_WC_SUCCESS))
nvme_rdma_wr_error(cq, wc, "SIG");
}
static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req, struct nvme_command *c,
int count, int pi_count)
{
struct nvme_rdma_sgl *sgl = &req->data_sgl;
struct ib_reg_wr *wr = &req->reg_wr;
struct request *rq = blk_mq_rq_from_pdu(req);
struct nvme_ns *ns = rq->q->queuedata;
struct bio *bio = rq->bio;
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
int nr;
req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
if (WARN_ON_ONCE(!req->mr))
return -EAGAIN;
nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL,
req->metadata_sgl->sg_table.sgl, pi_count, NULL,
SZ_4K);
if (unlikely(nr))
goto mr_put;
nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c,
req->mr->sig_attrs, ns->pi_type);
nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
req->reg_cqe.done = nvme_rdma_sig_done;
memset(wr, 0, sizeof(*wr));
wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
wr->wr.wr_cqe = &req->reg_cqe;
wr->wr.num_sge = 0;
wr->wr.send_flags = 0;
wr->mr = req->mr;
wr->key = req->mr->rkey;
wr->access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
sg->addr = cpu_to_le64(req->mr->iova);
put_unaligned_le24(req->mr->length, sg->length);
put_unaligned_le32(req->mr->rkey, sg->key);
sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
return 0;
mr_put:
ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr);
req->mr = NULL;
if (nr < 0)
return nr;
return -EINVAL;
}
static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
struct request *rq, struct nvme_command *c)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
int pi_count = 0;
int count, ret;
req->num_sge = 1;
@ -1272,22 +1457,52 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
if (!blk_rq_nr_phys_segments(rq))
return nvme_rdma_set_sg_null(c);
req->sg_table.sgl = req->first_sgl;
ret = sg_alloc_table_chained(&req->sg_table,
blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl,
NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
req->data_sgl.sg_table.sgl);
count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
rq_dma_dir(rq));
count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
req->data_sgl.nents, rq_dma_dir(rq));
if (unlikely(count <= 0)) {
ret = -EIO;
goto out_free_table;
}
if (blk_integrity_rq(rq)) {
req->metadata_sgl->sg_table.sgl =
(struct scatterlist *)(req->metadata_sgl + 1);
ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
blk_rq_count_integrity_sg(rq->q, rq->bio),
req->metadata_sgl->sg_table.sgl,
NVME_INLINE_METADATA_SG_CNT);
if (unlikely(ret)) {
ret = -ENOMEM;
goto out_unmap_sg;
}
req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
rq->bio, req->metadata_sgl->sg_table.sgl);
pi_count = ib_dma_map_sg(ibdev,
req->metadata_sgl->sg_table.sgl,
req->metadata_sgl->nents,
rq_dma_dir(rq));
if (unlikely(pi_count <= 0)) {
ret = -EIO;
goto out_free_pi_table;
}
}
if (req->use_sig_mr) {
ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
goto out;
}
if (count <= dev->num_inline_segments) {
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
queue->ctrl->use_inline_data &&
@ -1306,14 +1521,23 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
ret = nvme_rdma_map_sg_fr(queue, req, c, count);
out:
if (unlikely(ret))
goto out_unmap_sg;
goto out_unmap_pi_sg;
return 0;
out_unmap_pi_sg:
if (blk_integrity_rq(rq))
ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
req->metadata_sgl->nents, rq_dma_dir(rq));
out_free_pi_table:
if (blk_integrity_rq(rq))
sg_free_table_chained(&req->metadata_sgl->sg_table,
NVME_INLINE_METADATA_SG_CNT);
out_unmap_sg:
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
rq_dma_dir(rq));
out_free_table:
sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
return ret;
}
@ -1761,6 +1985,15 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
queue->pi_support &&
(c->common.opcode == nvme_cmd_write ||
c->common.opcode == nvme_cmd_read) &&
nvme_ns_has_pi(ns))
req->use_sig_mr = true;
else
req->use_sig_mr = false;
err = nvme_rdma_map_data(queue, rq, c);
if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
@ -1801,12 +2034,46 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
return ib_process_cq_direct(queue->ib_cq, -1);
}
static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req)
{
struct request *rq = blk_mq_rq_from_pdu(req);
struct ib_mr_status mr_status;
int ret;
ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
if (ret) {
pr_err("ib_check_mr_status failed, ret %d\n", ret);
nvme_req(rq)->status = NVME_SC_INVALID_PI;
return;
}
if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
switch (mr_status.sig_err.err_type) {
case IB_SIG_BAD_GUARD:
nvme_req(rq)->status = NVME_SC_GUARD_CHECK;
break;
case IB_SIG_BAD_REFTAG:
nvme_req(rq)->status = NVME_SC_REFTAG_CHECK;
break;
case IB_SIG_BAD_APPTAG:
nvme_req(rq)->status = NVME_SC_APPTAG_CHECK;
break;
}
pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
mr_status.sig_err.err_type, mr_status.sig_err.expected,
mr_status.sig_err.actual);
}
}
static void nvme_rdma_complete_rq(struct request *rq)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct ib_device *ibdev = queue->device->dev;
if (req->use_sig_mr)
nvme_rdma_check_pi_status(req);
nvme_rdma_unmap_data(queue, rq);
ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
@ -1926,7 +2193,7 @@ out_fail:
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.name = "rdma",
.module = THIS_MODULE,
.flags = NVME_F_FABRICS,
.flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,

View File

@ -885,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
if (last && !queue->data_digest)
flags |= MSG_EOR;
else
flags |= MSG_MORE;
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
/* can't zcopy slab pages */
if (unlikely(PageSlab(page))) {
@ -924,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
bool inline_data = nvme_tcp_has_inline_data(req);
int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR);
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) + hdgst - req->offset;
int flags = MSG_DONTWAIT;
int ret;
if (inline_data)
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
@ -967,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
offset_in_page(pdu) + req->offset, len,
MSG_DONTWAIT | MSG_MORE);
MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (unlikely(ret <= 0))
return ret;

View File

@ -4,6 +4,7 @@ config NVME_TARGET
tristate "NVMe Target support"
depends on BLOCK
depends on CONFIGFS_FS
select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
select SGL_ALLOC
help
This enabled target side support for the NVMe protocol, that is

View File

@ -295,7 +295,7 @@ out:
static void nvmet_execute_get_log_page(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd)))
if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
return;
switch (req->cmd->get_log_page.lid) {
@ -341,6 +341,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
u32 cmd_capsule_size;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@ -433,9 +434,15 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
/* Max command capsule size is sqe + single page of in-capsule data */
id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
req->port->inline_data_size) / 16);
/*
* Max command capsule size is sqe + in-capsule data size.
* Disable in-capsule data for Metadata capable controllers.
*/
cmd_capsule_size = sizeof(struct nvme_command);
if (!ctrl->pi_support)
cmd_capsule_size += req->port->inline_data_size;
id->ioccsz = cpu_to_le32(cmd_capsule_size / 16);
/* Max response capsule size is cqe */
id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
@ -465,6 +472,7 @@ out:
static void nvmet_execute_identify_ns(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
struct nvme_id_ns *id;
u16 status = 0;
@ -482,14 +490,11 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
}
/* return an all zeroed buffer if we can't find an active namespace */
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid);
if (!ns)
goto done;
if (ns->bdev)
nvmet_bdev_ns_revalidate(ns);
else
nvmet_file_ns_revalidate(ns);
nvmet_ns_revalidate(ns);
/*
* nuse = ncap = nsze isn't always true, but we have no way to find
@ -526,6 +531,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->lbaf[0].ds = ns->blksize_shift;
if (ctrl->pi_support && nvmet_ns_has_pi(ns)) {
id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST |
NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 |
NVME_NS_DPC_PI_TYPE3;
id->mc = NVME_MC_EXTENDED_LBA;
id->dps = ns->pi_type;
id->flbas = NVME_NS_FLBAS_META_EXT;
id->lbaf[0].ms = cpu_to_le16(ns->metadata_size);
}
if (ns->readonly)
id->nsattr |= (1 << 0);
nvmet_put_namespace(ns);
@ -630,7 +645,7 @@ out:
static void nvmet_execute_identify(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
switch (req->cmd->identify.cns) {
@ -659,7 +674,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
*/
static void nvmet_execute_abort(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
nvmet_set_result(req, 1);
nvmet_req_complete(req, 0);
@ -748,7 +763,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
u16 nsqr;
u16 ncqr;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
@ -820,7 +835,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10)))
if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10)))
return;
switch (cdw10 & 0xff) {
@ -887,7 +902,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
mutex_lock(&ctrl->lock);
@ -906,7 +921,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
pr_debug("ctrl %d update keep-alive timer for %d secs\n",

View File

@ -248,6 +248,36 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, param_inline_data_size);
#ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
char *page)
{
struct nvmet_port *port = to_nvmet_port(item);
return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable);
}
static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
bool val;
if (strtobool(page, &val))
return -EINVAL;
if (port->enabled) {
pr_err("Disable port before setting pi_enable value.\n");
return -EACCES;
}
port->pi_enable = val;
return count;
}
CONFIGFS_ATTR(nvmet_, param_pi_enable);
#endif
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page)
{
@ -324,7 +354,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
kfree(ns->device_path);
ret = -ENOMEM;
ns->device_path = kstrndup(page, len, GFP_KERNEL);
ns->device_path = kmemdup_nul(page, len, GFP_KERNEL);
if (!ns->device_path)
goto out_unlock;
@ -540,6 +570,31 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_ns_, buffered_io);
static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
bool val;
if (strtobool(page, &val))
return -EINVAL;
if (!val)
return -EINVAL;
mutex_lock(&ns->subsys->lock);
if (!ns->enabled) {
pr_err("enable ns before revalidate.\n");
mutex_unlock(&ns->subsys->lock);
return -EINVAL;
}
nvmet_ns_revalidate(ns);
mutex_unlock(&ns->subsys->lock);
return count;
}
CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@ -547,6 +602,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
&nvmet_ns_attr_revalidate_size,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif
@ -960,7 +1016,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
return -EINVAL;
}
new_model_number = kstrndup(page, len, GFP_KERNEL);
new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
if (!new_model_number)
return -ENOMEM;
@ -984,6 +1040,28 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
#ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
char *page)
{
return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support);
}
static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_subsys *subsys = to_subsys(item);
bool pi_enable;
if (strtobool(page, &pi_enable))
return -EINVAL;
subsys->pi_support = pi_enable;
return count;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable);
#endif
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
&nvmet_subsys_attr_attr_version,
@ -991,6 +1069,9 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_cntlid_min,
&nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_subsys_attr_attr_pi_enable,
#endif
NULL,
};
@ -1146,7 +1227,7 @@ static const struct config_item_type nvmet_referrals_type = {
.ct_group_ops = &nvmet_referral_group_ops,
};
struct nvmet_type_name_map nvmet_ana_state[] = {
static struct nvmet_type_name_map nvmet_ana_state[] = {
{ NVME_ANA_OPTIMIZED, "optimized" },
{ NVME_ANA_NONOPTIMIZED, "non-optimized" },
{ NVME_ANA_INACCESSIBLE, "inaccessible" },
@ -1290,6 +1371,9 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
&nvmet_attr_param_inline_data_size,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_attr_param_pi_enable,
#endif
NULL,
};

View File

@ -134,15 +134,10 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
struct nvmet_async_event *aen;
struct nvmet_req *req;
while (1) {
mutex_lock(&ctrl->lock);
aen = list_first_entry_or_null(&ctrl->async_events,
struct nvmet_async_event, entry);
if (!aen || !ctrl->nr_async_event_cmds) {
mutex_unlock(&ctrl->lock);
break;
}
mutex_lock(&ctrl->lock);
while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
aen = list_first_entry(&ctrl->async_events,
struct nvmet_async_event, entry);
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
if (status == 0)
nvmet_set_result(req, nvmet_async_event_result(aen));
@ -151,20 +146,21 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
kfree(aen);
mutex_unlock(&ctrl->lock);
trace_nvmet_async_event(ctrl, req->cqe->result.u32);
nvmet_req_complete(req, status);
mutex_lock(&ctrl->lock);
}
mutex_unlock(&ctrl->lock);
}
static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
{
struct nvmet_req *req;
struct nvmet_async_event *aen, *tmp;
mutex_lock(&ctrl->lock);
while (ctrl->nr_async_event_cmds) {
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
mutex_unlock(&ctrl->lock);
nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
mutex_lock(&ctrl->lock);
list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
list_del(&aen->entry);
kfree(aen);
}
mutex_unlock(&ctrl->lock);
}
@ -322,12 +318,21 @@ int nvmet_enable_port(struct nvmet_port *port)
if (!try_module_get(ops->owner))
return -EINVAL;
ret = ops->add_port(port);
if (ret) {
module_put(ops->owner);
return ret;
/*
* If the user requested PI support and the transport isn't pi capable,
* don't enable the port.
*/
if (port->pi_enable && !ops->metadata_support) {
pr_err("T10-PI is not supported by transport type %d\n",
port->disc_addr.trtype);
ret = -EINVAL;
goto out_put;
}
ret = ops->add_port(port);
if (ret)
goto out_put;
/* If the transport didn't set inline_data_size, then disable it. */
if (port->inline_data_size < 0)
port->inline_data_size = 0;
@ -335,6 +340,10 @@ int nvmet_enable_port(struct nvmet_port *port)
port->enabled = true;
port->tr_ops = ops;
return 0;
out_put:
module_put(ops->owner);
return ret;
}
void nvmet_disable_port(struct nvmet_port *port)
@ -514,6 +523,19 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
ns->nsid);
}
void nvmet_ns_revalidate(struct nvmet_ns *ns)
{
loff_t oldsize = ns->size;
if (ns->bdev)
nvmet_bdev_ns_revalidate(ns);
else
nvmet_file_ns_revalidate(ns);
if (oldsize != ns->size)
nvmet_ns_changed(ns->subsys, ns->nsid);
}
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
@ -764,10 +786,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
* If this is the admin queue, complete all AERs so that our
* queue doesn't have outstanding requests on it.
*/
if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) {
if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
nvmet_async_events_process(ctrl, status);
nvmet_async_events_free(ctrl);
}
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
@ -873,8 +893,11 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->sq = sq;
req->ops = ops;
req->sg = NULL;
req->metadata_sg = NULL;
req->sg_cnt = 0;
req->metadata_sg_cnt = 0;
req->transfer_len = 0;
req->metadata_len = 0;
req->cqe->status = 0;
req->cqe->sq_head = 0;
req->ns = NULL;
@ -936,9 +959,9 @@ void nvmet_req_uninit(struct nvmet_req *req)
}
EXPORT_SYMBOL_GPL(nvmet_req_uninit);
bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
{
if (unlikely(data_len != req->transfer_len)) {
if (unlikely(len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
return false;
@ -946,7 +969,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
return true;
}
EXPORT_SYMBOL_GPL(nvmet_check_data_len);
EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
{
@ -959,50 +982,90 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
return true;
}
int nvmet_req_alloc_sgl(struct nvmet_req *req)
static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
{
struct pci_dev *p2p_dev = NULL;
return req->transfer_len - req->metadata_len;
}
if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
if (req->sq->ctrl && req->ns)
p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
req->ns->nsid);
static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req)
{
req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt,
nvmet_data_transfer_len(req));
if (!req->sg)
goto out_err;
req->p2p_dev = NULL;
if (req->sq->qid && p2p_dev) {
req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
req->transfer_len);
if (req->sg) {
req->p2p_dev = p2p_dev;
return 0;
}
}
if (req->metadata_len) {
req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev,
&req->metadata_sg_cnt, req->metadata_len);
if (!req->metadata_sg)
goto out_free_sg;
}
return 0;
out_free_sg:
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
out_err:
return -ENOMEM;
}
/*
* If no P2P memory was available we fallback to using
* regular memory
*/
static bool nvmet_req_find_p2p_dev(struct nvmet_req *req)
{
if (!IS_ENABLED(CONFIG_PCI_P2PDMA))
return false;
if (req->sq->ctrl && req->sq->qid && req->ns) {
req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
req->ns->nsid);
if (req->p2p_dev)
return true;
}
req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
req->p2p_dev = NULL;
return false;
}
int nvmet_req_alloc_sgls(struct nvmet_req *req)
{
if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req))
return 0;
req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
&req->sg_cnt);
if (unlikely(!req->sg))
return -ENOMEM;
goto out;
if (req->metadata_len) {
req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
&req->metadata_sg_cnt);
if (unlikely(!req->metadata_sg))
goto out_free;
}
return 0;
out_free:
sgl_free(req->sg);
out:
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
void nvmet_req_free_sgl(struct nvmet_req *req)
void nvmet_req_free_sgls(struct nvmet_req *req)
{
if (req->p2p_dev)
if (req->p2p_dev) {
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
else
if (req->metadata_sg)
pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
} else {
sgl_free(req->sg);
if (req->metadata_sg)
sgl_free(req->metadata_sg);
}
req->sg = NULL;
req->metadata_sg = NULL;
req->sg_cnt = 0;
req->metadata_sg_cnt = 0;
}
EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
static inline bool nvmet_cc_en(u32 cc)
{
@ -1357,6 +1420,7 @@ static void nvmet_ctrl_free(struct kref *ref)
ida_simple_remove(&cntlid_ida, ctrl->cntlid);
nvmet_async_events_free(ctrl);
kfree(ctrl->sqs);
kfree(ctrl->cqs);
kfree(ctrl->changed_ns_list);

View File

@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
u16 status = 0;
void *buffer;
if (!nvmet_check_data_len(req, data_len))
if (!nvmet_check_transfer_len(req, data_len))
return;
if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
const char model[] = "Linux";
u16 status = 0;
if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat = 0;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {

View File

@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
if (req->cmd->prop_set.attrib & 1) {
@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
u16 status = 0;
u64 val = 0;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
if (req->cmd->prop_get.attrib & 1) {
@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
struct nvmet_ctrl *ctrl = NULL;
u16 status = 0;
if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
d = kmalloc(sizeof(*d), GFP_KERNEL);
@ -197,6 +197,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req);
@ -205,8 +207,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
pr_info("creating controller %d for subsystem %s for NQN %s.\n",
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
pr_info("creating controller %d for subsystem %s for NQN %s%s.\n",
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
ctrl->pi_support ? " T10-PI is enabled" : "");
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
out:
@ -223,7 +226,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
d = kmalloc(sizeof(*d), GFP_KERNEL);

View File

@ -47,6 +47,22 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
id->nows = to0based(ql->io_opt / ql->logical_block_size);
}
static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
{
struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
if (bi) {
ns->metadata_size = bi->tuple_size;
if (bi->profile == &t10_pi_type1_crc)
ns->pi_type = NVME_NS_DPS_PI_TYPE1;
else if (bi->profile == &t10_pi_type3_crc)
ns->pi_type = NVME_NS_DPS_PI_TYPE3;
else
/* Unsupported metadata type */
ns->metadata_size = 0;
}
}
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
@ -64,6 +80,12 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
}
ns->size = i_size_read(ns->bdev->bd_inode);
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
ns->pi_type = 0;
ns->metadata_size = 0;
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
nvmet_bdev_ns_enable_integrity(ns);
return 0;
}
@ -147,6 +169,61 @@ static void nvmet_bio_done(struct bio *bio)
bio_put(bio);
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
struct sg_mapping_iter *miter)
{
struct blk_integrity *bi;
struct bio_integrity_payload *bip;
struct block_device *bdev = req->ns->bdev;
int rc;
size_t resid, len;
bi = bdev_get_integrity(bdev);
if (unlikely(!bi)) {
pr_err("Unable to locate bio_integrity\n");
return -ENODEV;
}
bip = bio_integrity_alloc(bio, GFP_NOIO,
min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
if (IS_ERR(bip)) {
pr_err("Unable to allocate bio_integrity_payload\n");
return PTR_ERR(bip);
}
bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
/* virtual start sector must be in integrity interval units */
bip_set_seed(bip, bio->bi_iter.bi_sector >>
(bi->interval_exp - SECTOR_SHIFT));
resid = bip->bip_iter.bi_size;
while (resid > 0 && sg_miter_next(miter)) {
len = min_t(size_t, miter->length, resid);
rc = bio_integrity_add_page(bio, miter->page, len,
offset_in_page(miter->addr));
if (unlikely(rc != len)) {
pr_err("bio_integrity_add_page() failed; %d\n", rc);
sg_miter_stop(miter);
return -ENOMEM;
}
resid -= len;
if (len < miter->length)
miter->consumed -= miter->length - len;
}
sg_miter_stop(miter);
return 0;
}
#else
static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
struct sg_mapping_iter *miter)
{
return -EINVAL;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
@ -154,9 +231,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
struct scatterlist *sg;
struct blk_plug plug;
sector_t sector;
int op, i;
int op, i, rc;
struct sg_mapping_iter prot_miter;
unsigned int iter_flags;
unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
if (!nvmet_check_transfer_len(req, total_len))
return;
if (!req->sg_cnt) {
@ -168,8 +248,10 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
op |= REQ_FUA;
iter_flags = SG_MITER_TO_SG;
} else {
op = REQ_OP_READ;
iter_flags = SG_MITER_FROM_SG;
}
if (is_pci_p2pdma_page(sg_page(req->sg)))
@ -191,11 +273,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio->bi_opf = op;
blk_start_plug(&plug);
if (req->metadata_len)
sg_miter_start(&prot_miter, req->metadata_sg,
req->metadata_sg_cnt, iter_flags);
for_each_sg(req->sg, sg, req->sg_cnt, i) {
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
!= sg->length) {
struct bio *prev = bio;
if (req->metadata_len) {
rc = nvmet_bdev_alloc_bip(req, bio,
&prot_miter);
if (unlikely(rc)) {
bio_io_error(bio);
return;
}
}
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
@ -209,6 +304,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
sg_cnt--;
}
if (req->metadata_len) {
rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
if (unlikely(rc)) {
bio_io_error(bio);
return;
}
}
submit_bio(bio);
blk_finish_plug(&plug);
}
@ -217,7 +320,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
struct bio *bio = &req->b.inline_bio;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
@ -309,7 +412,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
sector_t nr_sector;
int ret;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
sector = le64_to_cpu(write_zeroes->slba) <<
@ -336,6 +439,8 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_bdev_execute_rw;
if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
req->metadata_len = nvmet_rw_metadata_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_bdev_execute_flush;

View File

@ -241,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
{
ssize_t nr_bvec = req->sg_cnt;
if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
return;
if (!req->sg_cnt || !nr_bvec) {
@ -285,7 +285,7 @@ static void nvmet_file_flush_work(struct work_struct *w)
static void nvmet_file_execute_flush(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
INIT_WORK(&req->f.work, nvmet_file_flush_work);
schedule_work(&req->f.work);
@ -375,7 +375,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
schedule_work(&req->f.work);

View File

@ -19,6 +19,7 @@
#include <linux/rcupdate.h>
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/t10-pi.h>
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
@ -77,6 +78,8 @@ struct nvmet_ns {
int use_p2pmem;
struct pci_dev *p2p_dev;
int pi_type;
int metadata_size;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@ -142,6 +145,7 @@ struct nvmet_port {
bool enabled;
int inline_data_size;
const struct nvmet_fabrics_ops *tr_ops;
bool pi_enable;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@ -201,6 +205,7 @@ struct nvmet_ctrl {
spinlock_t error_lock;
u64 err_counter;
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
bool pi_support;
};
struct nvmet_subsys_model {
@ -230,6 +235,7 @@ struct nvmet_subsys {
u64 ver;
u64 serial;
char *subsysnqn;
bool pi_support;
struct config_group group;
@ -281,6 +287,7 @@ struct nvmet_fabrics_ops {
unsigned int type;
unsigned int msdbd;
bool has_keyed_sgls : 1;
bool metadata_support : 1;
void (*queue_response)(struct nvmet_req *req);
int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port);
@ -302,6 +309,7 @@ struct nvmet_req {
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
struct scatterlist *metadata_sg;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
union {
struct {
@ -315,8 +323,10 @@ struct nvmet_req {
} f;
};
int sg_cnt;
int metadata_sg_cnt;
/* data length as parsed from the SGL descriptor: */
size_t transfer_len;
size_t metadata_len;
struct nvmet_port *port;
@ -384,11 +394,11 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
int nvmet_req_alloc_sgl(struct nvmet_req *req);
void nvmet_req_free_sgl(struct nvmet_req *req);
int nvmet_req_alloc_sgls(struct nvmet_req *req);
void nvmet_req_free_sgls(struct nvmet_req *req);
void nvmet_execute_keep_alive(struct nvmet_req *req);
@ -500,13 +510,22 @@ u16 nvmet_file_flush(struct nvmet_req *req);
void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
void nvmet_ns_revalidate(struct nvmet_ns *ns);
static inline u32 nvmet_rw_len(struct nvmet_req *req)
static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
{
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req)
{
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
return 0;
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) *
req->ns->metadata_size;
}
static inline u32 nvmet_dsm_len(struct nvmet_req *req)
{
return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
@ -521,4 +540,11 @@ static inline __le16 to0based(u32 a)
return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
}
static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns)
{
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
return false;
return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple);
}
#endif /* _NVMET_H */

View File

@ -33,6 +33,7 @@
/* Assume mpsmin == device_page_size == 4KB */
#define NVMET_RDMA_MAX_MDTS 8
#define NVMET_RDMA_MAX_METADATA_MDTS 5
struct nvmet_rdma_srq;
@ -60,6 +61,7 @@ struct nvmet_rdma_rsp {
struct nvmet_rdma_queue *queue;
struct ib_cqe read_cqe;
struct ib_cqe write_cqe;
struct rdma_rw_ctx rw;
struct nvmet_req req;
@ -161,6 +163,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
@ -423,6 +426,9 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
/* Data In / RDMA READ */
r->read_cqe.done = nvmet_rdma_read_data_done;
/* Data Out / RDMA WRITE */
r->write_cqe.done = nvmet_rdma_write_data_done;
return 0;
out_free_rsp:
@ -532,6 +538,129 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
spin_unlock(&queue->rsp_wr_wait_lock);
}
static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr)
{
struct ib_mr_status mr_status;
int ret;
u16 status = 0;
ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
if (ret) {
pr_err("ib_check_mr_status failed, ret %d\n", ret);
return NVME_SC_INVALID_PI;
}
if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
switch (mr_status.sig_err.err_type) {
case IB_SIG_BAD_GUARD:
status = NVME_SC_GUARD_CHECK;
break;
case IB_SIG_BAD_REFTAG:
status = NVME_SC_REFTAG_CHECK;
break;
case IB_SIG_BAD_APPTAG:
status = NVME_SC_APPTAG_CHECK;
break;
}
pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
mr_status.sig_err.err_type,
mr_status.sig_err.expected,
mr_status.sig_err.actual);
}
return status;
}
static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
struct nvme_command *cmd, struct ib_sig_domain *domain,
u16 control, u8 pi_type)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
domain->sig.dif.bg_type = IB_T10DIF_CRC;
domain->sig.dif.pi_interval = 1 << bi->interval_exp;
domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
if (control & NVME_RW_PRINFO_PRCHK_REF)
domain->sig.dif.ref_remap = true;
domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
domain->sig.dif.app_escape = true;
if (pi_type == NVME_NS_DPS_PI_TYPE3)
domain->sig.dif.ref_escape = true;
}
static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req,
struct ib_sig_attrs *sig_attrs)
{
struct nvme_command *cmd = req->cmd;
u16 control = le16_to_cpu(cmd->rw.control);
u8 pi_type = req->ns->pi_type;
struct blk_integrity *bi;
bi = bdev_get_integrity(req->ns->bdev);
memset(sig_attrs, 0, sizeof(*sig_attrs));
if (control & NVME_RW_PRINFO_PRACT) {
/* for WRITE_INSERT/READ_STRIP no wire domain */
sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
pi_type);
/* Clear the PRACT bit since HCA will generate/verify the PI */
control &= ~NVME_RW_PRINFO_PRACT;
cmd->rw.control = cpu_to_le16(control);
/* PI is added by the HW */
req->transfer_len += req->metadata_len;
} else {
/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
pi_type);
nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
pi_type);
}
if (control & NVME_RW_PRINFO_PRCHK_REF)
sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
if (control & NVME_RW_PRINFO_PRCHK_GUARD)
sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
if (control & NVME_RW_PRINFO_PRCHK_APP)
sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
}
static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key,
struct ib_sig_attrs *sig_attrs)
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
struct nvmet_req *req = &rsp->req;
int ret;
if (req->metadata_len)
ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp,
cm_id->port_num, req->sg, req->sg_cnt,
req->metadata_sg, req->metadata_sg_cnt, sig_attrs,
addr, key, nvmet_data_dir(req));
else
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
req->sg, req->sg_cnt, 0, addr, key,
nvmet_data_dir(req));
return ret;
}
static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp)
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
struct nvmet_req *req = &rsp->req;
if (req->metadata_len)
rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp,
cm_id->port_num, req->sg, req->sg_cnt,
req->metadata_sg, req->metadata_sg_cnt,
nvmet_data_dir(req));
else
rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num,
req->sg, req->sg_cnt, nvmet_data_dir(req));
}
static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
{
@ -539,14 +668,11 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
if (rsp->n_rdma) {
rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
queue->cm_id->port_num, rsp->req.sg,
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
}
if (rsp->n_rdma)
nvmet_rdma_rw_ctx_destroy(rsp);
if (rsp->req.sg != rsp->cmd->inline_sg)
nvmet_req_free_sgl(&rsp->req);
nvmet_req_free_sgls(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@ -598,11 +724,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
rsp->send_wr.opcode = IB_WR_SEND;
}
if (nvmet_rdma_need_data_out(rsp))
first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
cm_id->port_num, NULL, &rsp->send_wr);
else
if (nvmet_rdma_need_data_out(rsp)) {
if (rsp->req.metadata_len)
first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
cm_id->port_num, &rsp->write_cqe, NULL);
else
first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
cm_id->port_num, NULL, &rsp->send_wr);
} else {
first_wr = &rsp->send_wr;
}
nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
@ -621,15 +752,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
struct nvmet_rdma_queue *queue = cq->cq_context;
u16 status = 0;
WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
queue->cm_id->port_num, rsp->req.sg,
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
rsp->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
nvmet_rdma_rw_ctx_destroy(rsp);
nvmet_req_uninit(&rsp->req);
nvmet_rdma_release_rsp(rsp);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
@ -640,7 +770,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
rsp->req.execute(&rsp->req);
if (rsp->req.metadata_len)
status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
nvmet_rdma_rw_ctx_destroy(rsp);
if (unlikely(status))
nvmet_req_complete(&rsp->req, status);
else
rsp->req.execute(&rsp->req);
}
static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
struct nvmet_rdma_queue *queue = cq->cq_context;
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u16 status;
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
return;
WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
rsp->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
nvmet_rdma_rw_ctx_destroy(rsp);
nvmet_req_uninit(&rsp->req);
nvmet_rdma_release_rsp(rsp);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n",
wc->wr_cqe, ib_wc_status_msg(wc->status),
wc->status);
nvmet_rdma_error_comp(queue);
}
return;
}
/*
* Upon RDMA completion check the signature status
* - if succeeded send good NVMe response
* - if failed send bad NVMe response with appropriate error
*/
status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
if (unlikely(status))
rsp->req.cqe->status = cpu_to_le16(status << 1);
nvmet_rdma_rw_ctx_destroy(rsp);
if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) {
pr_err("sending cmd response failed\n");
nvmet_rdma_release_rsp(rsp);
}
}
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@ -697,9 +878,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
struct nvme_keyed_sgl_desc *sgl, bool invalidate)
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u64 addr = le64_to_cpu(sgl->addr);
u32 key = get_unaligned_le32(sgl->key);
struct ib_sig_attrs sig_attrs;
int ret;
rsp->req.transfer_len = get_unaligned_le24(sgl->length);
@ -708,13 +889,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
if (!rsp->req.transfer_len)
return 0;
ret = nvmet_req_alloc_sgl(&rsp->req);
if (rsp->req.metadata_len)
nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs);
ret = nvmet_req_alloc_sgls(&rsp->req);
if (unlikely(ret < 0))
goto error_out;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
nvmet_data_dir(&rsp->req));
ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs);
if (unlikely(ret < 0))
goto error_out;
rsp->n_rdma += ret;
@ -1108,6 +1290,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
}
if (queue->port->pi_enable && queue->host_qid)
qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
if (ret) {
pr_err("failed to create_qp ret= %d\n", ret);
@ -1226,6 +1411,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_queue *queue;
int ret;
@ -1252,6 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
queue->dev = ndev;
queue->cm_id = cm_id;
queue->port = port->nport;
spin_lock_init(&queue->state_lock);
queue->state = NVMET_RDMA_Q_CONNECTING;
@ -1369,7 +1556,6 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_device *ndev;
struct nvmet_rdma_queue *queue;
int ret = -EINVAL;
@ -1385,7 +1571,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ret = -ENOMEM;
goto put_device;
}
queue->port = port->nport;
if (queue->host_qid == 0) {
/* Let inflight controller teardown complete */
@ -1657,6 +1842,14 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
goto out_destroy_id;
}
if (port->nport->pi_enable &&
!(cm_id->device->attrs.device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER)) {
pr_err("T10-PI is not supported for %pISpcs\n", addr);
ret = -EINVAL;
goto out_destroy_id;
}
port->cm_id = cm_id;
return 0;
@ -1766,6 +1959,8 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
{
if (ctrl->pi_support)
return NVMET_RDMA_MAX_METADATA_MDTS;
return NVMET_RDMA_MAX_MDTS;
}
@ -1774,6 +1969,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.type = NVMF_TRTYPE_RDMA,
.msdbd = 1,
.has_keyed_sgls = 1,
.metadata_support = 1,
.add_port = nvmet_rdma_add_port,
.remove_port = nvmet_rdma_remove_port,
.queue_response = nvmet_rdma_queue_response,

View File

@ -325,6 +325,14 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
}
static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
{
if (status == -EPIPE || status == -ECONNRESET)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
else
nvmet_tcp_fatal_error(queue);
}
static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
{
struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
@ -510,7 +518,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
offset_in_page(cmd->data_pdu) + cmd->offset,
left, MSG_DONTWAIT | MSG_MORE);
left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (ret <= 0)
return ret;
@ -538,7 +546,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
if ((!last_in_batch && cmd->queue->send_list_len) ||
cmd->wbytes_done + left < cmd->req.transfer_len ||
queue->data_digest || !queue->nvme_sq.sqhd_disabled)
flags |= MSG_MORE;
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
left, flags);
@ -585,7 +593,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
flags |= MSG_MORE;
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@ -614,7 +622,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
flags |= MSG_MORE;
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@ -644,6 +652,8 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
if (!last_in_batch && cmd->queue->send_list_len)
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR;
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0))
@ -716,11 +726,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
if (ret <= 0)
if (unlikely(ret < 0)) {
nvmet_tcp_socket_error(queue, ret);
goto done;
} else if (ret == 0) {
break;
}
(*sends)++;
}
done:
return ret;
}
@ -1157,11 +1171,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_recv_one(queue);
if (ret <= 0)
if (unlikely(ret < 0)) {
nvmet_tcp_socket_error(queue, ret);
goto done;
} else if (ret == 0) {
break;
}
(*recvs)++;
}
done:
return ret;
}
@ -1186,27 +1204,16 @@ static void nvmet_tcp_io_work(struct work_struct *w)
pending = false;
ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
if (ret > 0) {
if (ret > 0)
pending = true;
} else if (ret < 0) {
if (ret == -EPIPE || ret == -ECONNRESET)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
else
nvmet_tcp_fatal_error(queue);
else if (ret < 0)
return;
}
ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
if (ret > 0) {
/* transmitted message/data */
if (ret > 0)
pending = true;
} else if (ret < 0) {
if (ret == -EPIPE || ret == -ECONNRESET)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
else
nvmet_tcp_fatal_error(queue);
else if (ret < 0)
return;
}
} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);

View File

@ -130,6 +130,34 @@ TRACE_EVENT(nvmet_req_complete,
);
#define aer_name(aer) { aer, #aer }
TRACE_EVENT(nvmet_async_event,
TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result),
TP_ARGS(ctrl, result),
TP_STRUCT__entry(
__field(int, ctrl_id)
__field(u32, result)
),
TP_fast_assign(
__entry->ctrl_id = ctrl->cntlid;
__entry->result = (le32_to_cpu(result) & 0xff00) >> 8;
),
TP_printk("nvmet%d: NVME_AEN=%#08x [%s]",
__entry->ctrl_id, __entry->result,
__print_symbolic(__entry->result,
aer_name(NVME_AER_NOTICE_NS_CHANGED),
aer_name(NVME_AER_NOTICE_ANA),
aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
aer_name(NVME_AER_NOTICE_DISC_CHANGED),
aer_name(NVME_AER_ERROR),
aer_name(NVME_AER_SMART),
aer_name(NVME_AER_CSS),
aer_name(NVME_AER_VS))
)
);
#undef aer_name
#endif /* _TRACE_NVMET_H */
#undef TRACE_INCLUDE_PATH

View File

@ -895,7 +895,7 @@ __lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS,
"6213 NVMEx LS REQ Abort: Unable to locate req x%p\n",
pnvme_lsreq);
return 1;
return -EINVAL;
}
static int

View File

@ -3207,8 +3207,10 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
ctxp = cmdwqe->context2;
result = wcqe->parameter;
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
atomic_inc(&tgtp->xmt_ls_abort_cmpl);
if (phba->nvmet_support) {
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
atomic_inc(&tgtp->xmt_ls_abort_cmpl);
}
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
"6083 Abort cmpl: ctx x%px WCQE:%08x %08x %08x %08x\n",
@ -3244,7 +3246,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
struct lpfc_async_xchg_ctx *ctxp,
uint32_t sid, uint16_t xri)
{
struct lpfc_nvmet_tgtport *tgtp;
struct lpfc_nvmet_tgtport *tgtp = NULL;
struct lpfc_iocbq *abts_wqeq;
union lpfc_wqe128 *wqe_abts;
struct lpfc_nodelist *ndlp;
@ -3253,13 +3255,15 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
"6067 ABTS: sid %x xri x%x/x%x\n",
sid, xri, ctxp->wqeq->sli4_xritag);
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
if (phba->nvmet_support && phba->targetport)
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
ndlp = lpfc_findnode_did(phba->pport, sid);
if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
(ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
atomic_inc(&tgtp->xmt_abort_rsp_error);
if (tgtp)
atomic_inc(&tgtp->xmt_abort_rsp_error);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
"6134 Drop ABTS - wrong NDLP state x%x.\n",
(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
@ -3538,7 +3542,7 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
struct lpfc_async_xchg_ctx *ctxp,
uint32_t sid, uint16_t xri)
{
struct lpfc_nvmet_tgtport *tgtp;
struct lpfc_nvmet_tgtport *tgtp = NULL;
struct lpfc_iocbq *abts_wqeq;
unsigned long flags;
int rc;
@ -3555,7 +3559,9 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
ctxp->state = LPFC_NVME_STE_LS_ABORT;
}
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
if (phba->nvmet_support && phba->targetport)
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
if (!ctxp->wqeq) {
/* Issue ABTS for this WQE based on iotag */
ctxp->wqeq = lpfc_sli_get_iocbq(phba);
@ -3582,18 +3588,19 @@ lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq);
spin_unlock_irqrestore(&phba->hbalock, flags);
if (rc == WQE_SUCCESS) {
atomic_inc(&tgtp->xmt_abort_unsol);
if (tgtp)
atomic_inc(&tgtp->xmt_abort_unsol);
return 0;
}
out:
atomic_inc(&tgtp->xmt_abort_rsp_error);
if (tgtp)
atomic_inc(&tgtp->xmt_abort_rsp_error);
abts_wqeq->context2 = NULL;
abts_wqeq->context3 = NULL;
lpfc_sli_release_iocbq(phba, abts_wqeq);
kfree(ctxp);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
"6056 Failed to Issue ABTS. Status x%x\n", rc);
return 0;
return 1;
}
/**

View File

@ -2813,7 +2813,7 @@ lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
struct lpfc_async_xchg_ctx *axchg = NULL;
char *failwhy = NULL;
uint32_t oxid, sid, did, fctl, size;
int ret;
int ret = 1;
d_buf = piocb->context2;
@ -2897,14 +2897,16 @@ lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
(phba->nvmet_support) ? "T" : "I", ret);
out_fail:
kfree(axchg);
/* recycle receive buffer */
lpfc_in_buf_free(phba, &nvmebuf->dbuf);
/* If start of new exchange, abort it */
if (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX))
lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid);
if (axchg && (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX)))
ret = lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid);
if (ret)
kfree(axchg);
}
/**

View File

@ -169,8 +169,6 @@ struct disk_part_tbl {
struct disk_events;
struct badblocks;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct blk_integrity {
const struct blk_integrity_profile *profile;
unsigned char flags;
@ -179,8 +177,6 @@ struct blk_integrity {
unsigned char tag_size;
};
#endif /* CONFIG_BLK_DEV_INTEGRITY */
struct gendisk {
/* major, first_minor and minors are input parameters only,
* don't use directly. Use disk_devt() and disk_max_parts().

View File

@ -420,6 +420,12 @@ enum {
NVME_NS_DPS_PI_TYPE3 = 3,
};
/* Identify Namespace Metadata Capabilities (MC): */
enum {
NVME_MC_EXTENDED_LBA = (1 << 0),
NVME_MC_METADATA_PTR = (1 << 1),
};
struct nvme_ns_id_desc {
__u8 nidt;
__u8 nidl;
@ -1185,7 +1191,7 @@ struct nvmf_disc_rsp_page_hdr {
__le64 numrec;
__le16 recfmt;
__u8 resv14[1006];
struct nvmf_disc_rsp_page_entry entries[0];
struct nvmf_disc_rsp_page_entry entries[];
};
enum {