-
Notifications
You must be signed in to change notification settings - Fork 2
/
KoStreamedMath.h
200 lines (171 loc) · 6.06 KB
/
KoStreamedMath.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*
* Copyright (c) 2012 Dmitry Kazakov <[email protected]>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __KOSTREAMED_MATH_H
#define __KOSTREAMED_MATH_H
#if defined _MSC_VER
// Lets shut up the "possible loss of data" and "forcing value to bool 'true' or 'false'
#pragma warning ( push )
#pragma warning ( disable : 4244 )
#pragma warning ( disable : 4800 )
#endif
#include <Vc/Vc>
#include <Vc/IO>
#if defined _MSC_VER
#pragma warning ( pop )
#endif
#include <stdint.h>
#include <KoAlwaysInline.h>
#include <iostream>
#define BLOCKDEBUG 0
#if !defined _MSC_VER
#pragma GCC diagnostic ignored "-Wcast-align"
#endif
struct KoStreamedMath {
using int_v = Vc::SimdArray<int, Vc::float_v::size()>;
using uint_v = Vc::SimdArray<unsigned int, Vc::float_v::size()>;
static inline quint8 round_float_to_uint(float value) {
return quint8(value + float(0.5));
}
static inline quint8 lerp_mixed_u8_float(quint8 a, quint8 b, float alpha) {
return round_float_to_uint(qint16(b - a) * alpha + a);
}
/**
* Get a vector containing first Vc::float_v::size() values of mask.
* Each source mask element is considered to be a 8-bit integer
*/
static inline Vc::float_v fetch_mask_8(const quint8 *data) {
uint_v data_i(data);
return Vc::simd_cast<Vc::float_v>(int_v(data_i));
}
/**
* Get an alpha values from Vc::float_v::size() pixels 32-bit each
* (4 channels, 8 bit per channel). The alpha value is considered
* to be stored in the most significant byte of the pixel
*
* \p aligned controls whether the \p data is fetched using aligned
* instruction or not.
* 1) Fetching aligned data with unaligned instruction
* degrades performance.
* 2) Fetching unaligned data with aligned instruction
* causes \#GP (General Protection Exception)
*/
template <bool aligned>
static inline Vc::float_v fetch_alpha_32(const quint8 *data) {
uint_v data_i;
if (aligned) {
data_i.load((const quint32*)data, Vc::Aligned);
} else {
data_i.load((const quint32*)data, Vc::Unaligned);
}
return Vc::simd_cast<Vc::float_v>(int_v(data_i >> 24));
}
/**
* Get color values from Vc::float_v::size() pixels 32-bit each
* (4 channels, 8 bit per channel). The color data is considered
* to be stored in the 3 least significant bytes of the pixel.
*
* \p aligned controls whether the \p data is fetched using aligned
* instruction or not.
* 1) Fetching aligned data with unaligned instruction
* degrades performance.
* 2) Fetching unaligned data with aligned instruction
* causes \#GP (General Protection Exception)
*/
template <bool aligned>
static inline void fetch_colors_32(const quint8 *data,
Vc::float_v &c1,
Vc::float_v &c2,
Vc::float_v &c3) {
int_v data_i;
if (aligned) {
data_i.load((const quint32*)data, Vc::Aligned);
} else {
data_i.load((const quint32*)data, Vc::Unaligned);
}
const quint32 lowByteMask = 0xFF;
uint_v mask(lowByteMask);
c1 = Vc::simd_cast<Vc::float_v>(int_v((data_i >> 16) & mask));
c2 = Vc::simd_cast<Vc::float_v>(int_v((data_i >> 8) & mask));
c3 = Vc::simd_cast<Vc::float_v>(int_v( data_i & mask));
}
/**
* Pack color and alpha values to Vc::float_v::size() pixels 32-bit each
* (4 channels, 8 bit per channel). The color data is considered
* to be stored in the 3 least significant bytes of the pixel, alpha -
* in the most significant byte
*
* NOTE: \p data must be aligned pointer!
*/
static inline void write_channels_32(quint8 *data,
Vc::float_v::AsArg alpha,
Vc::float_v::AsArg c1,
Vc::float_v::AsArg c2,
Vc::float_v::AsArg c3) {
/**
* FIXME: make conversion float->int
* use methematical rounding
*/
const quint32 lowByteMask = 0xFF;
// FIXME: Use single-instruction rounding + conversion
// The achieve that we need to implement Vc::iRound()
uint_v mask(lowByteMask);
uint_v v1 = uint_v(int_v(Vc::round(alpha))) << 24;
uint_v v2 = (uint_v(int_v(Vc::round(c1))) & mask) << 16;
uint_v v3 = (uint_v(int_v(Vc::round(c2))) & mask) << 8;
uint_v v4 = uint_v(int_v(Vc::round(c3))) & mask;
v1 = v1 | v2;
v3 = v3 | v4;
(v1 | v3).store((quint32*)data, Vc::Aligned);
}
};
namespace KoStreamedMathFunctions {
template<int pixelSize>
ALWAYS_INLINE void clearPixel(quint8* dst);
template<>
ALWAYS_INLINE void clearPixel<4>(quint8* dst)
{
quint32 *d = reinterpret_cast<quint32*>(dst);
*d = 0;
}
template<>
ALWAYS_INLINE void clearPixel<16>(quint8* dst)
{
quint64 *d = reinterpret_cast<quint64*>(dst);
d[0] = 0;
d[1] = 0;
}
template<int pixelSize>
ALWAYS_INLINE void copyPixel(const quint8 *src, quint8* dst);
template<>
ALWAYS_INLINE void copyPixel<4>(const quint8 *src, quint8* dst)
{
const quint32 *s = reinterpret_cast<const quint32*>(src);
quint32 *d = reinterpret_cast<quint32*>(dst);
*d = *s;
}
template<>
ALWAYS_INLINE void copyPixel<16>(const quint8 *src, quint8* dst)
{
const quint64 *s = reinterpret_cast<const quint64*>(src);
quint64 *d = reinterpret_cast<quint64*>(dst);
d[0] = s[0];
d[1] = s[1];
}
}
#endif /* __KOSTREAMED_MATH_H */