Skip to content

Commit

Permalink
Fixing more overflows
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Sep 26, 2018
1 parent 1ad101c commit dde3e49
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 79 deletions.
67 changes: 51 additions & 16 deletions src/simdbitpacking.c
Original file line number Diff line number Diff line change
Expand Up @@ -14261,28 +14261,63 @@ const __m128i *simdunpack_shortlength(const __m128i *in, int length,
inwordpointer = 0;
P = _mm_loadu_si128((__m128i *)in);
++in;
for (k = 0; k < length / 4; ++k) {
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else {
P = _mm_loadu_si128((__m128i *)in);
++in;
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
inwordpointer = bit - firstpass;
if (length % 4 == 0) {

for (k = 0; k + 1 < length / 4; ++k) {
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else {
P = _mm_loadu_si128((__m128i *)in);
++in;
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
inwordpointer = bit - firstpass;
}
answer = _mm_and_si128(maskbits, answer);
_mm_storeu_si128((__m128i *)out, answer);
out += 4;
}
if (k < length / 4) {
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else if (bit == firstpass) {
inwordpointer = 0;
} else {
P = _mm_loadu_si128((__m128i *)in);
++in;
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
inwordpointer = bit - firstpass;
}
answer = _mm_and_si128(maskbits, answer);
_mm_storeu_si128((__m128i *)out, answer);
out += 4;
}

} else {
for (k = 0; k < length / 4; ++k) {
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else {
P = _mm_loadu_si128((__m128i *)in);
++in;
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
inwordpointer = bit - firstpass;
}
answer = _mm_and_si128(maskbits, answer);
_mm_storeu_si128((__m128i *)out, answer);
out += 4;
}
answer = _mm_and_si128(maskbits, answer);
_mm_storeu_si128((__m128i *)out, answer);
out += 4;
}
if (length % 4 != 0) {
uint32_t buffer[4];
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else if(bit == firstpass) {
} else if (bit == firstpass) {
inwordpointer = 0;
} else {
P = _mm_loadu_si128((__m128i *)in);
Expand Down
69 changes: 52 additions & 17 deletions src/simdfor.c
Original file line number Diff line number Diff line change
Expand Up @@ -15076,7 +15076,7 @@ __m128i *simdpackFOR_length(uint32_t initvalue, const uint32_t *in, int length,
buffer[k] = in[length / 4 * 4 + k];
}
for (k = (length % 4); k < 4; ++k) {
buffer[k] = 0;
buffer[k] = initvalue;
}
value = _mm_sub_epi32(_mm_loadu_si128((__m128i *)buffer), offset);
P = _mm_or_si128(P, _mm_slli_epi32(value, inwordpointer));
Expand Down Expand Up @@ -15120,28 +15120,63 @@ const __m128i *simdunpackFOR_length(uint32_t initvalue, const __m128i *in,
inwordpointer = 0;
P = _mm_loadu_si128((__m128i *)in);
++in;
for (k = 0; k < length / 4; ++k) {
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else {
P = _mm_loadu_si128((__m128i *)in);
++in;
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
inwordpointer = bit - firstpass;
if (length % 4 == 0) {

for (k = 0; k + 1 < length / 4; ++k) {
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else {
P = _mm_loadu_si128((__m128i *)in);
++in;
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
inwordpointer = bit - firstpass;
}
answer = _mm_and_si128(maskbits, answer);
_mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset));
out += 4;
}
if (k < length / 4) {
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else if (bit == firstpass) {
inwordpointer = 0;
} else {
P = _mm_loadu_si128((__m128i *)in);
++in;
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
inwordpointer = bit - firstpass;
}
answer = _mm_and_si128(maskbits, answer);
_mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset));
out += 4;
}

} else {
for (k = 0; k < length / 4; ++k) {
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else {
P = _mm_loadu_si128((__m128i *)in);
++in;
answer = _mm_or_si128(_mm_slli_epi32(P, firstpass), answer);
inwordpointer = bit - firstpass;
}
answer = _mm_and_si128(maskbits, answer);
_mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset));
out += 4;
}
answer = _mm_and_si128(maskbits, answer);
_mm_storeu_si128((__m128i *)out, _mm_add_epi32(answer, offset));
out += 4;
}
if (length % 4 != 0) {
uint32_t buffer[4];
__m128i answer = _mm_srli_epi32(P, inwordpointer);
const uint32_t firstpass = sizeof(uint32_t) * 8 - inwordpointer;
if (bit < firstpass) {
inwordpointer += bit;
} else if(bit == firstpass) {
} else if (bit == firstpass) {
inwordpointer = 0;
} else {
P = _mm_loadu_si128((__m128i *)in);
Expand Down
113 changes: 67 additions & 46 deletions tests/unit.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,57 +7,78 @@
#include <stdlib.h>

int issue21() {
size_t sz = 110;
size_t i;
uint32_t *in = malloc(sz * sizeof(uint32_t));
uint32_t *out = malloc(sz * sizeof(uint32_t));
for (i = 0; i < sz; ++i)
in[i] = 255;
uint32_t b = maxbits_length(in, sz);
uint8_t *buf = malloc(simdpack_compressedbytes(sz, b));
__m128i *end = simdpack_length(in, sz, (__m128i *)buf, b);
if((uint8_t *)end - buf != simdpack_compressedbytes(sz, b)) {
printf("bad mem usage\n");
return -1;
}
simdunpack_length((const __m128i *)buf, sz, out, b);
for (i = 0; i < sz; ++i) {
if (in[i] != out[i]) {
printf("bug\n");
return -1;
printf("issue21");
fflush(stdout);
for (uint32_t bw = 0; bw < 30; bw++) {
printf(".");
fflush(stdout);
for (size_t sz = 1; sz < 4096; sz++) {

size_t i;
uint32_t *in = malloc(sz * sizeof(uint32_t));
uint32_t *out = malloc(sz * sizeof(uint32_t));
for (i = 0; i < sz; ++i)
in[i] = (1 << bw) - 1;
uint32_t b = maxbits_length(in, sz);
uint8_t *buf = malloc(simdpack_compressedbytes(sz, b));
__m128i *end = simdpack_length(in, sz, (__m128i *)buf, b);
if ((uint8_t *)end - buf != simdpack_compressedbytes(sz, b)) {
printf("bad mem usage\n");
return -1;
}
simdunpack_length((const __m128i *)buf, sz, out, b);
for (i = 0; i < sz; ++i) {
if (in[i] != out[i]) {
printf("bug\n");
return -1;
}
}
free(in);
free(out);
free(buf);
}
}
free(in);
free(out);
free(buf);
printf("\n");
return 0;
}

int issue21FOR() {
size_t i;
size_t sz = 110;
uint32_t *in = malloc(sz * sizeof(uint32_t));
uint32_t *out = malloc(sz * sizeof(uint32_t));
in[0] = 0;
for (i = 1; i < sz; ++i)
in[i] = 255;
uint32_t b = maxbits_length(in, sz);
uint8_t *buf = malloc(simdpackFOR_compressedbytes(sz, b));
__m128i *end = simdpackFOR_length(0, in, sz, (__m128i *)buf, b);
if((uint8_t *)end - buf != simdpackFOR_compressedbytes(sz, b)) {
printf("bad mem usage\n");
return -1;
}
simdunpackFOR_length(0, (const __m128i *)buf, sz, out, b);
for (i = 0; i < sz; ++i) {
if (in[i] != out[i]) {
printf("bug\n");
return -1;
size_t i, j;
printf("issue21for");
fflush(stdout);
for (uint32_t bw = 0; bw < 30; bw++) {
printf(".");
fflush(stdout);
for (size_t sz = 1; sz < 4096; sz++) {

uint32_t *in = malloc(sz * sizeof(uint32_t));
uint32_t *out = malloc(sz * sizeof(uint32_t));
in[0] = 0;
for (i = 1; i < sz; ++i)
in[i] = (1 << bw) - 1;
uint32_t b = maxbits_length(in, sz);
uint8_t *buf = malloc(simdpackFOR_compressedbytes(sz, b));
__m128i *end = simdpackFOR_length(0, in, sz, (__m128i *)buf, b);
if ((uint8_t *)end - buf != simdpackFOR_compressedbytes(sz, b)) {
printf("bad mem usage\n");
return -1;
}
simdunpackFOR_length(0, (const __m128i *)buf, sz, out, b);
for (i = 0; i < sz; ++i) {
if (in[i] != out[i]) {
for (j = 0; j < sz; ++j) {
printf("%zu : %u %u \n", j, in[j], out[j]);
}
printf("bug\n");
return -1;
}
}
free(in);
free(out);
free(buf);
}
}
free(in);
free(out);
free(buf);
printf("\n");
return 0;
}

Expand Down Expand Up @@ -404,7 +425,7 @@ int testavx2() {
int k;
printf(" gap = %u \n", gap);
for (k = 0; k < N; ++k)
datain[k] = (uint32_t)(((uint64_t)k * gap)&0xFFFFFFFF);
datain[k] = (uint32_t)(((uint64_t)k * gap) & 0xFFFFFFFF);
for (k = 0; k * AVXBlockSize < N; ++k) {
/*
First part works for general arrays (sorted or unsorted)
Expand Down Expand Up @@ -567,7 +588,7 @@ int test() {
int k;
printf(" gap = %u \n", gap);
for (k = 0; k < N; ++k)
datain[k] = (uint32_t)(((uint64_t)k * gap)&0xFFFFFFFF);
datain[k] = (uint32_t)(((uint64_t)k * gap) & 0xFFFFFFFF);
for (k = 0; k * SIMDBlockSize < N; ++k) {
/*
First part works for general arrays (sorted or unsorted)
Expand Down Expand Up @@ -630,7 +651,7 @@ int testFOR() {
int k;
printf(" gap = %u \n", gap);
for (k = 0; k < N; ++k)
datain[k] = (uint32_t)(((uint64_t)k * gap)&0xFFFFFFFF);
datain[k] = (uint32_t)(((uint64_t)k * gap) & 0xFFFFFFFF);
for (k = 0; k * SIMDBlockSize < N; ++k) {
int j;
simdmaxmin_length(datain + k * SIMDBlockSize, SIMDBlockSize, &tmin,
Expand Down

0 comments on commit dde3e49

Please sign in to comment.