Commit 4ad4c76c authored by Jin Yu's avatar Jin Yu Committed by Tomasz Zawadzki
Browse files

vhost-blk: resubmit inflight descs of packed ring



This patch is for packed ring live recovery.
After reconnection we should resubmit the inflight descs.

Change-Id: I133bf5f1c09029d3c693c0fef67a609d72f2bf69
Signed-off-by: default avatarJin Yu <jin.yu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4127


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Community-CI: Mellanox Build Bot
parent ec2e6e2b
Loading
Loading
Loading
Loading
+68 −5
Original line number Diff line number Diff line
@@ -251,6 +251,12 @@ vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc)
	return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
}

static bool
vhost_inflight_packed_desc_is_indirect(spdk_vhost_inflight_desc *cur_desc)
{
	return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
}

int
vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
		  uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
@@ -280,6 +286,22 @@ vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtque
	return 0;
}

static bool
vhost_packed_desc_indirect_to_desc_table(struct spdk_vhost_session *vsession,
		uint64_t addr, uint32_t len,
		struct vring_packed_desc **desc_table,
		uint32_t *desc_table_size)
{
	*desc_table_size = len / sizeof(struct vring_packed_desc);

	*desc_table = vhost_gpa_to_vva(vsession, addr, len);
	if (spdk_unlikely(*desc_table == NULL)) {
		return false;
	}

	return true;
}

int
vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
			 struct spdk_vhost_virtqueue *virtqueue,
@@ -294,14 +316,41 @@ vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
	 * different from split ring.
	 */
	if (vhost_vring_packed_desc_is_indirect(*desc)) {
		*desc_table_size = (*desc)->len / sizeof(struct vring_packed_desc);
		*desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr,
					       (*desc)->len);
		if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
				desc_table, desc_table_size)) {
			return -1;
		}

		*desc = *desc_table;
		if (spdk_unlikely(*desc == NULL)) {
	} else {
		*desc_table = NULL;
		*desc_table_size  = 0;
	}

	return 0;
}

int
vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
			      spdk_vhost_inflight_desc *desc_array,
			      uint16_t req_idx, spdk_vhost_inflight_desc **desc,
			      struct vring_packed_desc  **desc_table, uint32_t *desc_table_size)
{
	*desc = &desc_array[req_idx];

	if (vhost_inflight_packed_desc_is_indirect(*desc)) {
		if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
				desc_table, desc_table_size)) {
			return -1;
		}

		/* This desc is the inflight desc not the packed desc.
		 * When set the F_INDIRECT the table entry should be the packed desc
		 * so set the inflight desc NULL.
		 */
		*desc = NULL;
	} else {
		/* When not set the F_INDIRECT means there is no packed desc table */
		*desc_table = NULL;
		*desc_table_size = 0;
	}
@@ -624,6 +673,12 @@ vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc)
	return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
}

bool
vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc)
{
	return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
}

int
vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
				 struct spdk_vhost_virtqueue *vq,
@@ -695,6 +750,14 @@ vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec
					       desc->addr, desc->len);
}

int
vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
				 uint16_t *iov_index, const spdk_vhost_inflight_desc *desc)
{
	return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
					       desc->addr, desc->len);
}

/* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx.
 * 2, Update the vq->last_avail_idx to point next available desc chain.
 * 3, Update the avail_wrap_counter if last_avail_idx overturn.
+160 −12
Original line number Diff line number Diff line
@@ -245,27 +245,110 @@ blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession,
	return 0;
}

static int
blk_iovs_packed_desc_setup(struct spdk_vhost_session *vsession,
			   struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
			   struct vring_packed_desc *desc_table, uint16_t desc_table_size,
			   struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
{
	struct vring_packed_desc *desc;
	uint16_t cnt = 0, out_cnt = 0;
	uint32_t len = 0;

	if (desc_table == NULL) {
		desc = &vq->vring.desc_packed[req_idx];
	} else {
		req_idx = 0;
		desc = desc_table;
	}

	while (1) {
		/*
		 * Maximum cnt reached?
		 * Should not happen if request is well formatted, otherwise this is a BUG.
		 */
		if (spdk_unlikely(cnt == *iovs_cnt)) {
			SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
				    vsession->name, req_idx);
			return -EINVAL;
		}

		if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) {
			SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
				    vsession->name, req_idx, cnt);
			return -EINVAL;
		}

		len += desc->len;
		out_cnt += vhost_vring_packed_desc_is_wr(desc);

		/* desc is NULL means we reach the last desc of this request */
		vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size);
		if (desc == NULL) {
			break;
		}
	}

	/*
	 * There must be least two descriptors.
	 * First contain request so it must be readable.
	 * Last descriptor contain buffer for response so it must be writable.
	 */
	if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
		return -EINVAL;
	}

	*length = len;
	*iovs_cnt = cnt;

	return 0;
}

static int
blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
			    struct spdk_vhost_virtqueue *vq,
			    uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
			    struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
			    struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
{
	struct spdk_vhost_session *vsession = &bvsession->vsession;
	struct spdk_vhost_dev *vdev = vsession->vdev;
	struct vring_packed_desc *desc = NULL, *desc_table;
	uint32_t desc_table_size;
	int rc;

	rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
				      &desc_table, &desc_table_size);
	if (spdk_unlikely(rc != 0)) {
		SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
		return rc;
	}

	return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
					  iovs, iovs_cnt, length);
}

static int
blk_iovs_inflight_queue_setup(struct spdk_vhost_blk_session *bvsession,
			      struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
			      struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
{
	struct spdk_vhost_session *vsession = &bvsession->vsession;
	struct spdk_vhost_dev *vdev = vsession->vdev;
	spdk_vhost_inflight_desc *inflight_desc;
	struct vring_packed_desc *desc_table;
	uint16_t out_cnt = 0, cnt = 0;
	uint32_t desc_table_size, len = 0;
	int rc = 0;

	rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
				      &desc_table, &desc_table_size);
	rc = vhost_inflight_queue_get_desc(vsession, vq->vring_inflight.inflight_packed->desc,
					   req_idx, &inflight_desc, &desc_table, &desc_table_size);
	if (spdk_unlikely(rc != 0)) {
		SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
		return rc;
	}

	if (desc_table != NULL) {
		req_idx = 0;
		return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
						  iovs, iovs_cnt, length);
	}

	while (1) {
@@ -279,20 +362,21 @@ blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
			return -EINVAL;
		}

		if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) {
		if (spdk_unlikely(vhost_vring_inflight_desc_to_iov(vsession, iovs, &cnt, inflight_desc))) {
			SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
				    vsession->name, req_idx, cnt);
			return -EINVAL;
		}

		len += desc->len;
		out_cnt += vhost_vring_packed_desc_is_wr(desc);
		len += inflight_desc->len;
		out_cnt += vhost_vring_inflight_desc_is_wr(inflight_desc);

		/* desc is NULL means we reach the last desc of this request */
		vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size);
		if (desc == NULL) {
		/* Without F_NEXT means it's the last desc */
		if ((inflight_desc->flags & VRING_DESC_F_NEXT) == 0) {
			break;
		}

		inflight_desc = &vq->vring_inflight.inflight_packed->desc[inflight_desc->next];
	}

	/*
@@ -639,6 +723,64 @@ process_packed_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
	}
}

static void
process_packed_inflight_blk_task(struct spdk_vhost_virtqueue *vq,
				 uint16_t req_idx)
{
	spdk_vhost_inflight_desc *desc_array = vq->vring_inflight.inflight_packed->desc;
	spdk_vhost_inflight_desc *desc = &desc_array[req_idx];
	struct spdk_vhost_blk_task *task;
	uint16_t task_idx, num_descs;
	int rc;

	task_idx = desc_array[desc->last].id;
	num_descs = desc->num;
	/* In packed ring reconnection, we use the last_used_idx as the
	 * initial value. So when we process the inflight descs we still
	 * need to update the available ring index.
	 */
	vq->last_avail_idx += num_descs;
	if (vq->last_avail_idx >= vq->vring.size) {
		vq->last_avail_idx -= vq->vring.size;
		vq->packed.avail_phase = !vq->packed.avail_phase;
	}

	task = &((struct spdk_vhost_blk_task *)vq->tasks)[task_idx];
	if (spdk_unlikely(task->used)) {
		SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
			    task->bvsession->vsession.name, task_idx);
		task->used_len = 0;
		blk_task_enqueue(task);
		return;
	}

	task->req_idx = req_idx;
	task->num_descs = num_descs;
	task->buffer_id = task_idx;
	/* It's for cleaning inflight entries */
	task->inflight_head = req_idx;

	task->bvsession->vsession.task_cnt++;

	blk_task_init(task);

	rc = blk_iovs_inflight_queue_setup(task->bvsession, vq, task->req_idx, task->iovs, &task->iovcnt,
					   &task->payload_size);
	if (rc) {
		SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
		/* Only READ and WRITE are supported for now. */
		invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
		return;
	}

	if (process_blk_request(task, task->bvsession) == 0) {
		SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
			      task_idx);
	} else {
		SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
	}
}

static void
submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
		     struct spdk_vhost_virtqueue *vq)
@@ -665,8 +807,12 @@ submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
			continue;
		}

		if (vq->packed.packed_ring) {
			process_packed_inflight_blk_task(vq, req_idx);
		} else {
			process_blk_task(vq, req_idx);
		}
	}

	free(resubmit_list);
	resubmit->resubmit_list = NULL;
@@ -708,6 +854,8 @@ process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_vi
{
	uint16_t i = 0;

	submit_inflight_desc(bvsession, vq);

	while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS &&
	       vhost_vq_packed_ring_is_avail(vq)) {
		SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
+11 −0
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@

typedef struct rte_vhost_resubmit_desc spdk_vhost_resubmit_desc;
typedef struct rte_vhost_resubmit_info spdk_vhost_resubmit_info;
typedef struct rte_vhost_inflight_desc_packed	spdk_vhost_inflight_desc;

struct spdk_vhost_virtqueue {
	struct rte_vhost_vring vring;
@@ -287,6 +288,11 @@ int vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
			     uint16_t req_idx, struct vring_packed_desc **desc,
			     struct vring_packed_desc **desc_table, uint32_t *desc_table_size);

int vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
				  spdk_vhost_inflight_desc *desc_array,
				  uint16_t req_idx, spdk_vhost_inflight_desc **desc,
				  struct vring_packed_desc  **desc_table, uint32_t *desc_table_size);

/**
 * Send IRQ/call client (if pending) for \c vq.
 * \param vsession vhost session
@@ -379,6 +385,11 @@ bool vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc);
int vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
				   uint16_t *iov_index, const struct vring_packed_desc *desc);

bool vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc);

int vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
				     uint16_t *iov_index, const spdk_vhost_inflight_desc *desc);

uint16_t vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
		uint16_t *num_descs);