Commit 8aa497f0 authored by Changpeng Liu's avatar Changpeng Liu
Browse files

spdk: Add block fill API to ioat driver



For those Crystal Beach DMA channels which support block fill capability,
we add a fill API here that can zero out pages or fill them with a
fixed pattern.

Change-Id: I8a57337702b951c703d494004b111f6d206279fb
Signed-off-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
parent 0ca6afb8
Loading
Loading
Loading
Loading
+63 −12
Original line number Diff line number Diff line
@@ -49,6 +49,11 @@

#define SRC_BUFFER_SIZE (512*1024)

enum ioat_task_type {
	IOAT_COPY_TYPE,
	IOAT_FILL_TYPE,
};

struct user_config {
	int queue_depth;
	int time_in_sec;
@@ -67,6 +72,8 @@ static struct user_config g_user_config;
struct thread_entry {
	uint64_t xfer_completed;
	uint64_t xfer_failed;
	uint64_t fill_completed;
	uint64_t fill_failed;
	uint64_t current_queue_depth;
	unsigned lcore_id;
	bool is_draining;
@@ -75,9 +82,11 @@ struct thread_entry {
};

struct ioat_task {
	enum ioat_task_type type;
	struct thread_entry *thread_entry;
	void *buffer;
	int len;
	uint64_t fill_pattern;
	void *src;
	void *dst;
};
@@ -124,14 +133,29 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas
	int len;
	int src_offset;
	int dst_offset;

	int num_ddwords;
	uint64_t fill_pattern;

	if (ioat_task->type == IOAT_FILL_TYPE) {
		fill_pattern = rand_r(&seed);
		fill_pattern = fill_pattern << 32 | rand_r(&seed);

		/* ensure that the length of memset block is 8 Bytes aligned */
		num_ddwords = (rand_r(&seed) % SRC_BUFFER_SIZE) / 8;
		len = num_ddwords * 8;
		if (len < 8)
			len = 8;
		dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);
		ioat_task->fill_pattern = fill_pattern;
	} else {
		src_offset = rand_r(&seed) % SRC_BUFFER_SIZE;
		len = rand_r(&seed) % (SRC_BUFFER_SIZE - src_offset);
		dst_offset = rand_r(&seed) % (SRC_BUFFER_SIZE - len);

		memset(ioat_task->buffer, 0, SRC_BUFFER_SIZE);
	ioat_task->len = len;
		ioat_task->src =  g_src + src_offset;
	}
	ioat_task->len = len;
	ioat_task->dst = ioat_task->buffer + dst_offset;
	ioat_task->thread_entry = thread_entry;
}
@@ -139,14 +163,31 @@ static void prepare_ioat_task(struct thread_entry *thread_entry, struct ioat_tas
static void
ioat_done(void *cb_arg)
{
	uint64_t *value;
	int i, failed = 0;
	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
	struct thread_entry *thread_entry = ioat_task->thread_entry;

	if (ioat_task->type == IOAT_FILL_TYPE) {
		value = (uint64_t *)ioat_task->dst;
		for (i = 0; i < ioat_task->len / 8; i++) {
			if (*value != ioat_task->fill_pattern) {
				thread_entry->fill_failed++;
				failed = 1;
				break;
			}
			value++;
		}
		if (!failed)
			thread_entry->fill_completed++;
	} else {
		if (memcmp(ioat_task->src, ioat_task->dst, ioat_task->len)) {
			thread_entry->xfer_failed++;
		} else {
			thread_entry->xfer_completed++;
		}
	}

	thread_entry->current_queue_depth--;
	if (thread_entry->is_draining) {
		rte_mempool_put(thread_entry->data_pool, ioat_task->buffer);
@@ -278,6 +319,9 @@ drain_xfers(struct thread_entry *thread_entry)
static void
submit_single_xfer(struct ioat_task *ioat_task)
{
	if (ioat_task->type == IOAT_FILL_TYPE)
		ioat_submit_fill(ioat_task, ioat_done, ioat_task->dst, ioat_task->fill_pattern, ioat_task->len);
	else
		ioat_submit_copy(ioat_task, ioat_done, ioat_task->dst, ioat_task->src, ioat_task->len);
	ioat_task->thread_entry->current_queue_depth++;
}
@@ -290,6 +334,11 @@ submit_xfers(struct thread_entry *thread_entry, uint64_t queue_depth)
		rte_mempool_get(thread_entry->task_pool, (void **)&ioat_task);
		rte_mempool_get(thread_entry->data_pool, &(ioat_task->buffer));

		ioat_task->type = IOAT_COPY_TYPE;
		if (ioat_get_dma_capabilities() & IOAT_ENGINE_FILL_SUPPORTED) {
			if (queue_depth % 2)
				ioat_task->type = IOAT_FILL_TYPE;
		}
		prepare_ioat_task(thread_entry, ioat_task);
		submit_single_xfer(ioat_task);
	}
@@ -397,10 +446,12 @@ dump_result(struct thread_entry *threads, int len)
	for (i = 0; i < len; i++) {
		struct thread_entry *t = &threads[i];
		total_completed += t->xfer_completed;
		total_completed += t->fill_completed;
		total_failed += t->xfer_failed;
		total_failed += t->fill_failed;
		if (t->xfer_completed || t->xfer_failed)
			printf("lcore = %d, success = %ld, failed = %ld \n",
			       t->lcore_id, t->xfer_completed, t->xfer_failed);
			printf("lcore = %d, copy success = %ld, copy failed = %ld, fill success = %ld, fill failed = %ld \n",
			       t->lcore_id, t->xfer_completed, t->xfer_failed, t->fill_completed, t->fill_failed);
	}
	return total_failed ? 1 : 0;
}
+25 −0
Original line number Diff line number Diff line
@@ -90,6 +90,15 @@ void ioat_unregister_thread(void);
int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
			 void *dst, const void *src, uint64_t nbytes);

/**
 * Submit a DMA engine memory fill request.
 *
 * Before submitting any requests on a thread, the thread must be registered
 * using the \ref ioat_register_thread() function.
 */
int64_t ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn,
			 void *dst, uint64_t fill_pattern, uint64_t nbytes);

/**
 * Check for completed requests on the current thread.
 *
@@ -100,4 +109,20 @@ int64_t ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
 */
int ioat_process_events(void);

/**
 * DMA engine capability flags
 */
enum ioat_dma_capability_flags {
	IOAT_ENGINE_COPY_SUPPORTED	= 0x1, /**< The memory copy is supported */
	IOAT_ENGINE_FILL_SUPPORTED	= 0x2, /**< The memory fill is supported */
};

/**
 * Get the DMA engine capabilities.
 *
 * Before submitting any requests on a thread, the thread must be registered
 * using the \ref ioat_register_thread() function.
 */
uint32_t ioat_get_dma_capabilities(void);

#endif
+7 −0
Original line number Diff line number Diff line
@@ -53,6 +53,13 @@
#define IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
#define IOAT_CHANCTRL_INT_REARM			0x0001

/* DMA Channel Capabilities */
#define	IOAT_DMACAP_PB			(1 << 0)
#define	IOAT_DMACAP_DCA			(1 << 4)
#define	IOAT_DMACAP_BFILL		(1 << 6)
#define	IOAT_DMACAP_XOR			(1 << 8)
#define	IOAT_DMACAP_PQ			(1 << 9)
#define	IOAT_DMACAP_DMA_DIF		(1 << 10)

struct ioat_registers {
	uint8_t		chancnt;
+104 −0
Original line number Diff line number Diff line
@@ -295,6 +295,37 @@ ioat_prep_copy(struct ioat_channel *ioat, uint64_t dst,
	return desc;
}

static struct ioat_descriptor *
ioat_prep_fill(struct ioat_channel *ioat, uint64_t dst,
	       uint64_t fill_pattern, uint32_t len)
{
	struct ioat_descriptor *desc;
	union ioat_hw_descriptor *hw_desc;

	ioat_assert(len <= ioat->max_xfer_size);

	if (ioat_get_ring_space(ioat) < 1) {
		return NULL;
	}

	ioat_get_ring_entry(ioat, ioat->head, &desc, &hw_desc);

	hw_desc->fill.u.control_raw = 0;
	hw_desc->fill.u.control.op = IOAT_OP_FILL;
	hw_desc->fill.u.control.completion_update = 1;

	hw_desc->fill.size = len;
	hw_desc->fill.src_data = fill_pattern;
	hw_desc->fill.dest_addr = dst;

	desc->callback_fn = NULL;
	desc->callback_arg = NULL;

	ioat_submit_single(ioat);

	return desc;
}

static int ioat_reset_hw(struct ioat_channel *ioat)
{
	int timeout;
@@ -419,6 +450,10 @@ ioat_channel_start(struct ioat_channel *ioat)
		return -1;
	}

	/* Always support DMA copy */
	ioat->dma_capabilities = IOAT_ENGINE_COPY_SUPPORTED;
	if (ioat->regs->dmacapability & IOAT_DMACAP_BFILL)
		ioat->dma_capabilities |= IOAT_ENGINE_FILL_SUPPORTED;
	xfercap = ioat->regs->xfercap;

	/* Only bits [4:0] are valid. */
@@ -663,6 +698,75 @@ ioat_submit_copy(void *cb_arg, ioat_callback_t cb_fn,
	return nbytes;
}

int64_t
ioat_submit_fill(void *cb_arg, ioat_callback_t cb_fn,
		 void *dst, uint64_t fill_pattern, uint64_t nbytes)
{
	struct ioat_channel	*ioat;
	struct ioat_descriptor	*last_desc = NULL;
	uint64_t	remaining, op_size;
	uint64_t	vdst;
	uint32_t	orig_head;

	ioat = ioat_thread_channel;
	if (!ioat) {
		return -1;
	}

	if (!(ioat->dma_capabilities & IOAT_ENGINE_FILL_SUPPORTED)) {
		ioat_printf(ioat, "Channel does not support memory fill\n");
		return -1;
	}

	orig_head = ioat->head;

	vdst = (uint64_t)dst;
	remaining = nbytes;

	while (remaining) {
		op_size = remaining;
		op_size = min(op_size, ioat->max_xfer_size);
		remaining -= op_size;

		last_desc = ioat_prep_fill(ioat,
					   ioat_vtophys((void *)vdst),
					   fill_pattern,
					   op_size);

		if (remaining == 0 || last_desc == NULL) {
			break;
		}

		vdst += op_size;
	}

	if (last_desc) {
		last_desc->callback_fn = cb_fn;
		last_desc->callback_arg = cb_arg;
	} else {
		/*
		 * Ran out of descriptors in the ring - reset head to leave things as they were
		 * in case we managed to fill out any descriptors.
		 */
		ioat->head = orig_head;
		return -1;
	}

	ioat_flush(ioat);
	return nbytes;
}

uint32_t ioat_get_dma_capabilities(void)
{
	struct ioat_channel	*ioat;

	ioat = ioat_thread_channel;
	if (!ioat) {
		return 0;
	}
	return ioat->dma_capabilities;
}

int ioat_process_events(void)
{
	if (!ioat_thread_channel) {
+1 −0
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@ struct ioat_channel {
	struct ioat_descriptor		*ring;
	union ioat_hw_descriptor	*hw_ring;
	uint64_t			hw_ring_phys_addr;
	uint32_t			dma_capabilities;
};

static inline uint32_t