Commit fed358a0 authored by Richael Zhuang's avatar Richael Zhuang Committed by Tomasz Zawadzki
Browse files

util: fix misaligned load for uint64_t type



The following error was reported when running gpt_ut which is related
to crc32_update().
"load of misaligned address 0x001ffeff78cc for type 'const uint64_t',
which requires 8 byte alignment".

This patch preprocesses the first several bytes to make the buf address
passed to __crc32_d or__crc32_cd is 8 byte aligned. And finally process
the trailing bytes.

For function spdk_crc32c_update in crc32c.c, memcpy was used to avoid
misaligned load problem. Update it with above solution to reduce extra
overhead.

Signed-off-by: default avatarRichael Zhuang <richael.zhuang@arm.com>
Change-Id: I7c7aaa41e1c042a96668158818b06729fb3ceec6
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16801


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 2d590e74
Loading
Loading
Loading
Loading
+17 −6
Original line number Diff line number Diff line
@@ -31,22 +31,33 @@ crc32_table_init(struct spdk_crc32_table *table, uint32_t polynomial_reflect)
uint32_t
crc32_update(const struct spdk_crc32_table *table, const void *buf, size_t len, uint32_t crc)
{
	size_t count;
	size_t count_pre, count_post, count_mid;
	const uint64_t *dword_buf;

	count = len & 7;
	while (count--) {
	/* process the head and tail bytes seperately to make the buf address
	 * passed to crc32_d is 8 byte aligned. This can avoid unaligned loads.
	 */
	count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7);
	count_post = (uint64_t)(buf + len) & 7;
	count_mid = (len - count_pre - count_post) / 8;

	while (count_pre--) {
		crc = __crc32b(crc, *(const uint8_t *)buf);
		buf++;
	}
	dword_buf = (const uint64_t *)buf;

	count = len / 8;
	while (count--) {
	dword_buf = (const uint64_t *)buf;
	while (count_mid--) {
		crc = __crc32d(crc, *dword_buf);
		dword_buf++;
	}

	buf = dword_buf;
	while (count_post--) {
		crc = __crc32b(crc, *(const uint8_t *)buf);
		buf++;
	}

	return crc;
}

+40 −26
Original line number Diff line number Diff line
@@ -20,30 +20,34 @@ spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
uint32_t
spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
{
	size_t count_pre, count_post, count_mid;
	const uint64_t *dword_buf;
	uint64_t crc_tmp64;
	size_t count;

	/* process the head and tail bytes seperately to make the buf address
	 * passed to _mm_crc32_u64 is 8 byte aligned. This can avoid unaligned loads.
	 */
	count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7);
	count_post = (uint64_t)(buf + len) & 7;
	count_mid = (len - count_pre - count_post) / 8;

	while (count_pre--) {
		crc = _mm_crc32_u8(crc, *(const uint8_t *)buf);
		buf++;
	}

	/* _mm_crc32_u64() needs a 64-bit intermediate value */
	crc_tmp64 = crc;
	dword_buf = (const uint64_t *)buf;

	/* Process as much of the buffer as possible in 64-bit blocks. */
	count = len / 8;
	while (count--) {
		uint64_t block;

		/*
		 * Use memcpy() to avoid unaligned loads, which are undefined behavior in C.
		 * The compiler will optimize out the memcpy() in release builds.
		 */
		memcpy(&block, buf, sizeof(block));
		crc_tmp64 = _mm_crc32_u64(crc_tmp64, block);
		buf += sizeof(block);
	while (count_mid--) {
		crc_tmp64 = _mm_crc32_u64(crc_tmp64, *dword_buf);
		dword_buf++;
	}
	crc = (uint32_t)crc_tmp64;

	/* Handle any trailing bytes. */
	count = len & 7;
	while (count--) {
	buf = dword_buf;
	crc = (uint32_t)crc_tmp64;
	while (count_post--) {
		crc = _mm_crc32_u8(crc, *(const uint8_t *)buf);
		buf++;
	}
@@ -56,19 +60,29 @@ spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
uint32_t
spdk_crc32c_update(const void *buf, size_t len, uint32_t crc)
{
	size_t count;
	size_t count_pre, count_post, count_mid;
	const uint64_t *dword_buf;

	/* process the head and tail bytes seperately to make the buf address
	 * passed to crc32_cd is 8 byte aligned. This can avoid unaligned loads.
	 */
	count_pre = ((uint64_t)buf & 7) == 0 ? 0 : 8 - ((uint64_t)buf & 7);
	count_post = (uint64_t)(buf + len) & 7;
	count_mid = (len - count_pre - count_post) / 8;

	count = len / 8;
	while (count--) {
		uint64_t block;
	while (count_pre--) {
		crc = __crc32cb(crc, *(const uint8_t *)buf);
		buf++;
	}

		memcpy(&block, buf, sizeof(block));
		crc = __crc32cd(crc, block);
		buf += sizeof(block);
	dword_buf = (const uint64_t *)buf;
	while (count_mid--) {
		crc = __crc32cd(crc, *dword_buf);
		dword_buf++;
	}

	count = len & 7;
	while (count--) {
	buf = dword_buf;
	while (count_post--) {
		crc = __crc32cb(crc, *(const uint8_t *)buf);
		buf++;
	}