-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathFillParallel.h
68 lines (63 loc) · 2.2 KB
/
FillParallel.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// TODO: Possible improvement is to provide an option to go around the CPU cache using SSE instructions for writes that can go around the cache, to not evict items out of the cache.
// Parallel Fill implementations
#ifndef _ParallelFill_h
#define _ParallelFill_h
#include "Configuration.h"
#include <iostream>
#include <algorithm>
#include <chrono>
#include <random>
#include <ratio>
#include <vector>
#include <thread>
#include <execution>
namespace ParallelAlgorithms
{
// Inclusive-left and exclusive-right boundaries
template< class _Type >
inline void parallel_fill(_Type* src, _Type value, size_t l, size_t r, size_t parallel_threshold = 16 * 1024)
{
if (r <= l)
return;
if ((r - l) < parallel_threshold)
{
std::fill(src + l, src + r, value); // many times faster than for loop
//for (size_t i = l; i < r; i++)
// src[i] = value;
return;
}
size_t m = r / 2 + l / 2 + (r % 2 + l % 2) / 2; // average without overflow
#if defined(USE_PPL)
Concurrency::parallel_invoke(
#else
tbb::parallel_invoke(
#endif
[&] { parallel_fill(src, value, l, m, parallel_threshold); },
[&] { parallel_fill(src, value, m, r, parallel_threshold); }
);
}
// Inclusive-left and exclusive-right boundaries
inline void parallel_fill(unsigned char* src, unsigned char value, size_t l, size_t r, size_t parallel_threshold = 16 * 1024)
{
if (r <= l)
return;
if ((r - l) < parallel_threshold)
{
//memset(src + l, (int)value, r - l); // many times faster than the for loop below
std::fill(src + l, src + r, value); // same performance as memset
//for (size_t i = l; i < r; i++)
// src[i] = value;
return;
}
size_t m = r / 2 + l / 2 + (r % 2 + l % 2) / 2; // average without overflow
#if defined(USE_PPL)
Concurrency::parallel_invoke(
#else
tbb::parallel_invoke(
#endif
[&] { parallel_fill(src, value, l, m, parallel_threshold); },
[&] { parallel_fill(src, value, m, r, parallel_threshold); }
);
}
}
#endif