Apollo 10.0
自动驾驶开放平台
util.h
浏览该文件的文档.
1/******************************************************************************
2 * Copyright 2018 The Apollo Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *****************************************************************************/
16
17#pragma once
18
19#include <fcntl.h> /* low-level i/o */
20#include <malloc.h>
21#include <sys/ioctl.h>
22#include <sys/mman.h>
23#include <sys/stat.h>
24#include <sys/time.h>
25#include <sys/types.h>
26#include <unistd.h>
27#include <cassert>
28#include <cerrno>
29#include <cstdint>
30#include <cstdio>
31#include <cstdlib>
32#include <cstring>
33
34#include <immintrin.h>
35#include <x86intrin.h>
36
37namespace apollo {
38namespace drivers {
39namespace camera {
40
41void yuyv2rgb_avx(unsigned char *YUV, unsigned char *RGB, int NumPixels);
42
43#define SIMD_INLINE inline __attribute__((always_inline))
44
45void print_m256(const __m256i a);
46void print_m256_i32(const __m256i a);
47void print_m256_i16(const __m256i a);
48
49template <class T>
50SIMD_INLINE char GetChar(T value, size_t index) {
51 return (reinterpret_cast<char *>(&value))[index];
52}
53
54#define SIMD_CHAR_AS_LONGLONG(a) (((int64_t)a) & 0xFF)
55
56#define SIMD_SHORT_AS_LONGLONG(a) (((int64_t)a) & 0xFFFF)
57
58#define SIMD_INT_AS_LONGLONG(a) (((int64_t)a) & 0xFFFFFFFF)
59
60#define SIMD_LL_SET1_EPI8(a) \
61 SIMD_CHAR_AS_LONGLONG(a) | (SIMD_CHAR_AS_LONGLONG(a) << 8) | \
62 (SIMD_CHAR_AS_LONGLONG(a) << 16) | (SIMD_CHAR_AS_LONGLONG(a) << 24) | \
63 (SIMD_CHAR_AS_LONGLONG(a) << 32) | (SIMD_CHAR_AS_LONGLONG(a) << 40) | \
64 (SIMD_CHAR_AS_LONGLONG(a) << 48) | (SIMD_CHAR_AS_LONGLONG(a) << 56)
65
66#define SIMD_LL_SET2_EPI8(a, b) \
67 SIMD_CHAR_AS_LONGLONG(a) | (SIMD_CHAR_AS_LONGLONG(b) << 8) | \
68 (SIMD_CHAR_AS_LONGLONG(a) << 16) | (SIMD_CHAR_AS_LONGLONG(b) << 24) | \
69 (SIMD_CHAR_AS_LONGLONG(a) << 32) | (SIMD_CHAR_AS_LONGLONG(b) << 40) | \
70 (SIMD_CHAR_AS_LONGLONG(a) << 48) | (SIMD_CHAR_AS_LONGLONG(b) << 56)
71
72#define SIMD_LL_SETR_EPI8(a, b, c, d, e, f, g, h) \
73 SIMD_CHAR_AS_LONGLONG(a) | (SIMD_CHAR_AS_LONGLONG(b) << 8) | \
74 (SIMD_CHAR_AS_LONGLONG(c) << 16) | (SIMD_CHAR_AS_LONGLONG(d) << 24) | \
75 (SIMD_CHAR_AS_LONGLONG(e) << 32) | (SIMD_CHAR_AS_LONGLONG(f) << 40) | \
76 (SIMD_CHAR_AS_LONGLONG(g) << 48) | (SIMD_CHAR_AS_LONGLONG(h) << 56)
77
78#define SIMD_LL_SET1_EPI16(a) \
79 SIMD_SHORT_AS_LONGLONG(a) | (SIMD_SHORT_AS_LONGLONG(a) << 16) | \
80 (SIMD_SHORT_AS_LONGLONG(a) << 32) | (SIMD_SHORT_AS_LONGLONG(a) << 48)
81
82#define SIMD_LL_SET2_EPI16(a, b) \
83 SIMD_SHORT_AS_LONGLONG(a) | (SIMD_SHORT_AS_LONGLONG(b) << 16) | \
84 (SIMD_SHORT_AS_LONGLONG(a) << 32) | (SIMD_SHORT_AS_LONGLONG(b) << 48)
85
86#define SIMD_LL_SETR_EPI16(a, b, c, d) \
87 SIMD_SHORT_AS_LONGLONG(a) | (SIMD_SHORT_AS_LONGLONG(b) << 16) | \
88 (SIMD_SHORT_AS_LONGLONG(c) << 32) | (SIMD_SHORT_AS_LONGLONG(d) << 48)
89
90#define SIMD_LL_SET1_EPI32(a) \
91 SIMD_INT_AS_LONGLONG(a) | (SIMD_INT_AS_LONGLONG(a) << 32)
92
93#define SIMD_LL_SET2_EPI32(a, b) \
94 SIMD_INT_AS_LONGLONG(a) | (SIMD_INT_AS_LONGLONG(b) << 32)
95
96#define SIMD_MM256_SET1_EPI8(a) \
97 { \
98 SIMD_LL_SET1_EPI8(a) \
99 , SIMD_LL_SET1_EPI8(a), SIMD_LL_SET1_EPI8(a), SIMD_LL_SET1_EPI8(a) \
100 }
101
102#define SIMD_MM256_SET2_EPI8(a0, a1) \
103 { \
104 SIMD_LL_SET2_EPI8(a0, a1) \
105 , SIMD_LL_SET2_EPI8(a0, a1), SIMD_LL_SET2_EPI8(a0, a1), \
106 SIMD_LL_SET2_EPI8(a0, a1) \
107 }
108
109#define SIMD_MM256_SETR_EPI8(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, \
110 ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, \
111 b8, b9, ba, bb, bc, bd, be, bf) \
112 { \
113 SIMD_LL_SETR_EPI8(a0, a1, a2, a3, a4, a5, a6, a7) \
114 , SIMD_LL_SETR_EPI8(a8, a9, aa, ab, ac, ad, ae, af), \
115 SIMD_LL_SETR_EPI8(b0, b1, b2, b3, b4, b5, b6, b7), \
116 SIMD_LL_SETR_EPI8(b8, b9, ba, bb, bc, bd, be, bf) \
117 }
118
119#define SIMD_MM256_SET1_EPI16(a) \
120 { \
121 SIMD_LL_SET1_EPI16(a) \
122 , SIMD_LL_SET1_EPI16(a), SIMD_LL_SET1_EPI16(a), SIMD_LL_SET1_EPI16(a) \
123 }
124
125#define SIMD_MM256_SET2_EPI16(a0, a1) \
126 { \
127 SIMD_LL_SET2_EPI16(a0, a1) \
128 , SIMD_LL_SET2_EPI16(a0, a1), SIMD_LL_SET2_EPI16(a0, a1), \
129 SIMD_LL_SET2_EPI16(a0, a1) \
130 }
131
132#define SIMD_MM256_SETR_EPI16(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, \
133 ac, ad, ae, af) \
134 { \
135 SIMD_LL_SETR_EPI16(a0, a1, a2, a3) \
136 , SIMD_LL_SETR_EPI16(a4, a5, a6, a7), SIMD_LL_SETR_EPI16(a8, a9, aa, ab), \
137 SIMD_LL_SETR_EPI16(ac, ad, ae, af) \
138 }
139
140#define SIMD_MM256_SET1_EPI32(a) \
141 { \
142 SIMD_LL_SET1_EPI32(a) \
143 , SIMD_LL_SET1_EPI32(a), SIMD_LL_SET1_EPI32(a), SIMD_LL_SET1_EPI32(a) \
144 }
145
146#define SIMD_MM256_SET2_EPI32(a0, a1) \
147 { \
148 SIMD_LL_SET2_EPI32(a0, a1) \
149 , SIMD_LL_SET2_EPI32(a0, a1), SIMD_LL_SET2_EPI32(a0, a1), \
150 SIMD_LL_SET2_EPI32(a0, a1) \
151 }
152
153#define SIMD_MM256_SETR_EPI32(a0, a1, a2, a3, a4, a5, a6, a7) \
154 { \
155 SIMD_LL_SET2_EPI32(a0, a1) \
156 , SIMD_LL_SET2_EPI32(a2, a3), SIMD_LL_SET2_EPI32(a4, a5), \
157 SIMD_LL_SET2_EPI32(a6, a7) \
158 }
159
160const size_t A = sizeof(__m256i);
161const size_t DA = 2 * A;
162const size_t QA = 4 * A;
163const size_t OA = 8 * A;
164const size_t HA = A / 2;
165
166const __m256i K_ZERO = SIMD_MM256_SET1_EPI8(0);
167const __m256i K_INV_ZERO = SIMD_MM256_SET1_EPI8(0xFF);
168
169const __m256i K8_01 = SIMD_MM256_SET1_EPI8(0x01);
170const __m256i K8_02 = SIMD_MM256_SET1_EPI8(0x02);
171const __m256i K8_04 = SIMD_MM256_SET1_EPI8(0x04);
172const __m256i K8_08 = SIMD_MM256_SET1_EPI8(0x08);
173const __m256i K8_10 = SIMD_MM256_SET1_EPI8(0x10);
174const __m256i K8_20 = SIMD_MM256_SET1_EPI8(0x20);
175const __m256i K8_40 = SIMD_MM256_SET1_EPI8(0x40);
176const __m256i K8_80 = SIMD_MM256_SET1_EPI8(0x80);
177
178const __m256i K8_01_FF = SIMD_MM256_SET2_EPI8(0x01, 0xFF);
179
180const __m256i K16_0001 = SIMD_MM256_SET1_EPI16(0x0001);
181const __m256i K16_0002 = SIMD_MM256_SET1_EPI16(0x0002);
182const __m256i K16_0003 = SIMD_MM256_SET1_EPI16(0x0003);
183const __m256i K16_0004 = SIMD_MM256_SET1_EPI16(0x0004);
184const __m256i K16_0005 = SIMD_MM256_SET1_EPI16(0x0005);
185const __m256i K16_0006 = SIMD_MM256_SET1_EPI16(0x0006);
186const __m256i K16_0008 = SIMD_MM256_SET1_EPI16(0x0008);
187const __m256i K16_0010 = SIMD_MM256_SET1_EPI16(0x0010);
188const __m256i K16_0018 = SIMD_MM256_SET1_EPI16(0x0018);
189const __m256i K16_0020 = SIMD_MM256_SET1_EPI16(0x0020);
190const __m256i K16_0080 = SIMD_MM256_SET1_EPI16(0x0080);
191const __m256i K16_00FF = SIMD_MM256_SET1_EPI16(0x00FF);
192const __m256i K16_FF00 = SIMD_MM256_SET1_EPI16(0xFF00);
193
194const __m256i K32_00000001 = SIMD_MM256_SET1_EPI32(0x00000001);
195const __m256i K32_00000002 = SIMD_MM256_SET1_EPI32(0x00000002);
196const __m256i K32_00000004 = SIMD_MM256_SET1_EPI32(0x00000004);
197const __m256i K32_00000008 = SIMD_MM256_SET1_EPI32(0x00000008);
198const __m256i K32_000000FF = SIMD_MM256_SET1_EPI32(0x000000FF);
199const __m256i K32_0000FFFF = SIMD_MM256_SET1_EPI32(0x0000FFFF);
200const __m256i K32_00010000 = SIMD_MM256_SET1_EPI32(0x00010000);
201const __m256i K32_01000000 = SIMD_MM256_SET1_EPI32(0x01000000);
202const __m256i K32_FFFFFF00 = SIMD_MM256_SET1_EPI32(0xFFFFFF00);
203
205 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206 -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1);
208 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, 0x0,
209 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
211 -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1,
212 -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD);
213
215 0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, -1, -1, -1, -1, -1, -1, -1, -1, 0x0,
216 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, -1, -1, -1, -1, -1, -1, -1, -1);
218 -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, -1,
219 -1, -1, -1, -1, -1, -1, -1, 0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe);
220
222 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1,
223 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
225 -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
226 -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1, -1, -1);
228 -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1,
229 -1, -1, -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1);
230
232 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1,
233 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd);
234const __m256i U_SHUFFLE4 =
235 SIMD_MM256_SETR_EPI8(0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0,
236 0x0, 0x5, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x6, 0x0,
237 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0);
238
240 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x3,
241 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
243 -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
244 -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1, -1, -1);
246 -1, -1, -1, -1, -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1,
247 -1, -1, -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1);
248
250 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1,
251 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf);
252
254 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1,
255 -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1);
257 -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8,
258 -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD);
260 -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1,
261 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF, -1, -1);
262
264 -1, 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5,
265 -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA);
267 -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1,
268 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1, -1);
270 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1,
271 -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF, -1);
272
274 -1, -1, 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1,
275 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1);
277 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1,
278 -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1);
280 -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA,
281 -1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF);
282
284 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
285 -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1);
287 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, 0x1,
288 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
290 -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1,
291 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE);
292
294 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
295 -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1);
297 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, 0x2,
298 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
300 -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1,
301 -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF);
302
305const int Y_ADJUST = 0;
306const int UV_ADJUST = 128;
308const int YUV_TO_BGR_ROUND_TERM = 0; // 1 << (YUV_TO_BGR_AVERAGING_SHIFT);
310 static_cast<int>((((1 << YUV_TO_BGR_AVERAGING_SHIFT))));
312 static_cast<int>((2.041 * (1 << YUV_TO_BGR_AVERAGING_SHIFT)));
314 -static_cast<int>((0.3455 * (1 << YUV_TO_BGR_AVERAGING_SHIFT)));
316 -static_cast<int>((0.7169 * (1 << YUV_TO_BGR_AVERAGING_SHIFT)));
318 static_cast<int>((1.4065 * (1 << YUV_TO_BGR_AVERAGING_SHIFT)));
319
320const __m256i K16_YRGB_RT =
323const __m256i K16_UG_VG =
326
327template <bool align>
328SIMD_INLINE __m256i Load(const __m256i *p);
329
330template <>
331SIMD_INLINE __m256i Load<false>(const __m256i *p) {
332 return _mm256_loadu_si256(p);
333}
334
335template <>
336SIMD_INLINE __m256i Load<true>(const __m256i *p) {
337 return _mm256_load_si256(p);
338}
339
340SIMD_INLINE void *AlignLo(const void *ptr, size_t align) {
341 return reinterpret_cast<void *>(((size_t)ptr) & ~(align - 1));
342}
343
344SIMD_INLINE bool Aligned(const void *ptr, size_t align = sizeof(__m256)) {
345 return ptr == AlignLo(ptr, align);
346}
347
348template <bool align>
349SIMD_INLINE void Store(__m256i *p, __m256i a);
350
351template <>
352SIMD_INLINE void Store<false>(__m256i *p, __m256i a) {
353 _mm256_storeu_si256(p, a);
354}
355
356template <>
357SIMD_INLINE void Store<true>(__m256i *p, __m256i a) {
358 _mm256_store_si256(p, a);
359}
360
361SIMD_INLINE __m256i SaturateI16ToU8(__m256i value) {
362 return _mm256_min_epi16(K16_00FF, _mm256_max_epi16(value, K_ZERO));
363}
364
365SIMD_INLINE __m256i AdjustY16(__m256i y16) {
366 return _mm256_subs_epi16(y16, K16_Y_ADJUST);
367}
368
369SIMD_INLINE __m256i AdjustUV16(__m256i uv16) {
370 return _mm256_subs_epi16(uv16, K16_UV_ADJUST);
371}
372
373SIMD_INLINE __m256i AdjustedYuvToRed32(__m256i y16_1, __m256i v16_0) {
374 // print_m256_i16(y16_1);
375 // print_m256_i16(v16_0);
376 // print_m256_i32(_mm256_madd_epi16(y16_1, K16_YRGB_RT));
377 // print_m256_i32(_mm256_madd_epi16(v16_0, K16_VR_0));
378 return _mm256_srai_epi32(
379 _mm256_add_epi32(_mm256_madd_epi16(y16_1, K16_YRGB_RT),
380 _mm256_madd_epi16(v16_0, K16_VR_0)),
382}
383
384SIMD_INLINE __m256i AdjustedYuvToRed16(__m256i y16, __m256i v16) {
385 // print_m256_i32(AdjustedYuvToRed32(_mm256_unpacklo_epi16(y16, K16_0001),
386 // _mm256_unpacklo_epi16(v16, K_ZERO)));
387 // print_m256_i16(_mm256_unpacklo_epi16(y16, K16_0001));
388 return SaturateI16ToU8(_mm256_packs_epi32(
389 AdjustedYuvToRed32(_mm256_unpacklo_epi16(y16, K16_0001),
390 _mm256_unpacklo_epi16(v16, K_ZERO)),
391 AdjustedYuvToRed32(_mm256_unpackhi_epi16(y16, K16_0001),
392 _mm256_unpackhi_epi16(v16, K_ZERO))));
393}
394
395SIMD_INLINE __m256i AdjustedYuvToGreen32(__m256i y16_1, __m256i u16_v16) {
396 return _mm256_srai_epi32(
397 _mm256_add_epi32(_mm256_madd_epi16(y16_1, K16_YRGB_RT),
398 _mm256_madd_epi16(u16_v16, K16_UG_VG)),
400}
401
402SIMD_INLINE __m256i AdjustedYuvToGreen16(__m256i y16, __m256i u16,
403 __m256i v16) {
404 return SaturateI16ToU8(_mm256_packs_epi32(
405 AdjustedYuvToGreen32(_mm256_unpacklo_epi16(y16, K16_0001),
406 _mm256_unpacklo_epi16(u16, v16)),
407 AdjustedYuvToGreen32(_mm256_unpackhi_epi16(y16, K16_0001),
408 _mm256_unpackhi_epi16(u16, v16))));
409}
410
411SIMD_INLINE __m256i AdjustedYuvToBlue32(__m256i y16_1, __m256i u16_0) {
412 return _mm256_srai_epi32(
413 _mm256_add_epi32(_mm256_madd_epi16(y16_1, K16_YRGB_RT),
414 _mm256_madd_epi16(u16_0, K16_UB_0)),
416}
417
418SIMD_INLINE __m256i AdjustedYuvToBlue16(__m256i y16, __m256i u16) {
419 return SaturateI16ToU8(_mm256_packs_epi32(
420 AdjustedYuvToBlue32(_mm256_unpacklo_epi16(y16, K16_0001),
421 _mm256_unpacklo_epi16(u16, K_ZERO)),
422 AdjustedYuvToBlue32(_mm256_unpackhi_epi16(y16, K16_0001),
423 _mm256_unpackhi_epi16(u16, K_ZERO))));
424}
425
426SIMD_INLINE __m256i YuvToRed(__m256i y, __m256i v) {
427 __m256i lo = AdjustedYuvToRed16(_mm256_unpacklo_epi8(y, K_ZERO),
428 AdjustUV16(_mm256_unpacklo_epi8(v, K_ZERO)));
429 __m256i hi = AdjustedYuvToRed16((_mm256_unpackhi_epi8(y, K_ZERO)),
430 AdjustUV16(_mm256_unpackhi_epi8(v, K_ZERO)));
431
432 // print_m256_i16(lo);
433 // print_m256_i16(hi);
434 return _mm256_packus_epi16(lo, hi);
435}
436
437SIMD_INLINE __m256i YuvToGreen(__m256i y, __m256i u, __m256i v) {
438 __m256i lo =
439 AdjustedYuvToGreen16((_mm256_unpacklo_epi8(y, K_ZERO)),
440 AdjustUV16(_mm256_unpacklo_epi8(u, K_ZERO)),
441 AdjustUV16(_mm256_unpacklo_epi8(v, K_ZERO)));
442 __m256i hi =
443 AdjustedYuvToGreen16((_mm256_unpackhi_epi8(y, K_ZERO)),
444 AdjustUV16(_mm256_unpackhi_epi8(u, K_ZERO)),
445 AdjustUV16(_mm256_unpackhi_epi8(v, K_ZERO)));
446 return _mm256_packus_epi16(lo, hi);
447}
448
449SIMD_INLINE __m256i YuvToBlue(__m256i y, __m256i u) {
450 __m256i lo = AdjustedYuvToBlue16((_mm256_unpacklo_epi8(y, K_ZERO)),
451 AdjustUV16(_mm256_unpacklo_epi8(u, K_ZERO)));
452 __m256i hi = AdjustedYuvToBlue16((_mm256_unpackhi_epi8(y, K_ZERO)),
453 AdjustUV16(_mm256_unpackhi_epi8(u, K_ZERO)));
454 return _mm256_packus_epi16(lo, hi);
455}
456
457template <int index>
458__m256i InterleaveBgr(__m256i blue, __m256i green, __m256i red);
459
460template <>
461SIMD_INLINE __m256i InterleaveBgr<0>(__m256i blue, __m256i green, __m256i red) {
462 return _mm256_or_si256(
463 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0x44),
465 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0x44),
467 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0x44),
469}
470
471template <>
472SIMD_INLINE __m256i InterleaveBgr<1>(__m256i blue, __m256i green, __m256i red) {
473 return _mm256_or_si256(
474 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0x99),
476 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0x99),
478 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0x99),
480}
481
482template <>
483SIMD_INLINE __m256i InterleaveBgr<2>(__m256i blue, __m256i green, __m256i red) {
484 return _mm256_or_si256(
485 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0xEE),
487 _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0xEE),
489 _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0xEE),
491}
492
493SIMD_INLINE __m256i BgrToBlue(__m256i bgr[3]) {
494 __m256i b0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_BLUE);
495 __m256i b2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_BLUE);
496 return _mm256_or_si256(
497 _mm256_permute2x128_si256(b0, b2, 0x20),
498 _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_BLUE),
499 _mm256_permute2x128_si256(b0, b2, 0x31)));
500}
501
502SIMD_INLINE __m256i BgrToGreen(__m256i bgr[3]) {
503 __m256i g0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_GREEN);
504 __m256i g2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_GREEN);
505 return _mm256_or_si256(
506 _mm256_permute2x128_si256(g0, g2, 0x20),
507 _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_GREEN),
508 _mm256_permute2x128_si256(g0, g2, 0x31)));
509}
510
511SIMD_INLINE __m256i BgrToRed(__m256i bgr[3]) {
512 __m256i r0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_RED);
513 __m256i r2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_RED);
514 return _mm256_or_si256(
515 _mm256_permute2x128_si256(r0, r2, 0x20),
516 _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_RED),
517 _mm256_permute2x128_si256(r0, r2, 0x31)));
518}
519
520template <bool align>
521SIMD_INLINE __m256i LoadPermuted(const __m256i *p) {
522 return _mm256_permute4x64_epi64(Load<align>(p), 0xD8);
523}
524
525} // namespace camera
526} // namespace drivers
527} // namespace apollo
#define SIMD_MM256_SET1_EPI16(a)
Definition util.h:119
#define SIMD_MM256_SET2_EPI8(a0, a1)
Definition util.h:102
#define SIMD_MM256_SET1_EPI32(a)
Definition util.h:140
#define SIMD_MM256_SETR_EPI8(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)
Definition util.h:109
#define SIMD_MM256_SET1_EPI8(a)
Definition util.h:96
#define SIMD_MM256_SET2_EPI16(a0, a1)
Definition util.h:125
#define SIMD_INLINE
Definition util.h:43
const int U_TO_GREEN_WEIGHT
Definition util.h:313
const size_t A
Definition util.h:160
const __m256i K16_FF00
Definition util.h:192
SIMD_INLINE __m256i BgrToGreen(__m256i bgr[3])
Definition util.h:502
const __m256i K32_00000004
Definition util.h:196
const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR2
Definition util.h:279
const __m256i K8_SHUFFLE_BGR1_TO_BLUE
Definition util.h:207
const __m256i K16_0010
Definition util.h:187
const __m256i K8_10
Definition util.h:173
const __m256i K16_0004
Definition util.h:183
const __m256i K16_0005
Definition util.h:184
const __m256i K16_YRGB_RT
Definition util.h:320
const size_t OA
Definition util.h:163
const __m256i K8_04
Definition util.h:171
void print_m256(__m256i a)
Definition util.cc:26
const size_t QA
Definition util.h:162
const __m256i K16_0001
Definition util.h:180
const int Y_TO_RGB_WEIGHT
Definition util.h:309
const __m256i K16_UG_VG
Definition util.h:323
const __m256i Y_SHUFFLE0
Definition util.h:214
const __m256i V_SHUFFLE3
Definition util.h:249
const __m256i K8_SHUFFLE_BGR1_TO_RED
Definition util.h:296
SIMD_INLINE __m256i YuvToRed(__m256i y, __m256i v)
Definition util.h:426
const __m256i K8_01
Definition util.h:169
SIMD_INLINE __m256i InterleaveBgr< 2 >(__m256i blue, __m256i green, __m256i red)
Definition util.h:483
const __m256i U_SHUFFLE1
Definition util.h:224
const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR0
Definition util.h:253
const __m256i K8_80
Definition util.h:176
void print_m256_i16(const __m256i a)
Definition util.cc:49
const __m256i K16_0008
Definition util.h:186
SIMD_INLINE __m256i SaturateI16ToU8(__m256i value)
Definition util.h:361
const __m256i K32_00010000
Definition util.h:200
const __m256i K16_00FF
Definition util.h:191
const int YUV_TO_BGR_AVERAGING_SHIFT
Definition util.h:307
const __m256i K16_VR_0
Definition util.h:322
SIMD_INLINE void Store< true >(__m256i *p, __m256i a)
Definition util.h:357
SIMD_INLINE __m256i AdjustY16(__m256i y16)
Definition util.h:365
const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR1
Definition util.h:256
SIMD_INLINE __m256i BgrToRed(__m256i bgr[3])
Definition util.h:511
SIMD_INLINE __m256i YuvToBlue(__m256i y, __m256i u)
Definition util.h:449
const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR2
Definition util.h:269
SIMD_INLINE __m256i Load< false >(const __m256i *p)
Definition util.h:331
SIMD_INLINE __m256i AdjustUV16(__m256i uv16)
Definition util.h:369
const __m256i K16_UV_ADJUST
Definition util.h:304
const size_t DA
Definition util.h:161
const __m256i V_SHUFFLE1
Definition util.h:242
SIMD_INLINE __m256i AdjustedYuvToBlue16(__m256i y16, __m256i u16)
Definition util.h:418
const __m256i K32_00000008
Definition util.h:197
const int UV_ADJUST
Definition util.h:306
const __m256i U_SHUFFLE2
Definition util.h:227
const __m256i V_SHUFFLE2
Definition util.h:245
const __m256i K16_0080
Definition util.h:190
const int V_TO_RED_WEIGHT
Definition util.h:317
SIMD_INLINE char GetChar(T value, size_t index)
Definition util.h:50
const __m256i K8_02
Definition util.h:170
const __m256i K8_40
Definition util.h:175
void print_m256_i32(const __m256i a)
Definition util.cc:37
const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR2
Definition util.h:259
SIMD_INLINE __m256i AdjustedYuvToRed16(__m256i y16, __m256i v16)
Definition util.h:384
const __m256i K32_00000002
Definition util.h:195
SIMD_INLINE void Store(__m256i *p, __m256i a)
SIMD_INLINE __m256i AdjustedYuvToBlue32(__m256i y16_1, __m256i u16_0)
Definition util.h:411
const __m256i K16_0018
Definition util.h:188
const __m256i K8_SHUFFLE_BGR0_TO_RED
Definition util.h:293
SIMD_INLINE __m256i InterleaveBgr< 1 >(__m256i blue, __m256i green, __m256i red)
Definition util.h:472
const int V_TO_GREEN_WEIGHT
Definition util.h:315
SIMD_INLINE __m256i AdjustedYuvToGreen32(__m256i y16_1, __m256i u16_v16)
Definition util.h:395
const __m256i K8_SHUFFLE_BGR2_TO_RED
Definition util.h:299
const __m256i Y_SHUFFLE1
Definition util.h:217
const __m256i K16_Y_ADJUST
Definition util.h:303
SIMD_INLINE __m256i AdjustedYuvToGreen16(__m256i y16, __m256i u16, __m256i v16)
Definition util.h:402
const __m256i K8_SHUFFLE_BGR0_TO_BLUE
Definition util.h:204
const __m256i K8_SHUFFLE_BGR2_TO_GREEN
Definition util.h:289
SIMD_INLINE void * AlignLo(const void *ptr, size_t align)
Definition util.h:340
SIMD_INLINE __m256i BgrToBlue(__m256i bgr[3])
Definition util.h:493
const size_t HA
Definition util.h:164
const __m256i K16_0006
Definition util.h:185
const __m256i V_SHUFFLE0
Definition util.h:239
const int YUV_TO_BGR_ROUND_TERM
Definition util.h:308
const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR1
Definition util.h:276
SIMD_INLINE __m256i AdjustedYuvToRed32(__m256i y16_1, __m256i v16_0)
Definition util.h:373
const int U_TO_BLUE_WEIGHT
Definition util.h:311
const int Y_ADJUST
Definition util.h:305
const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR0
Definition util.h:273
const __m256i U_SHUFFLE4
Definition util.h:234
const __m256i K_INV_ZERO
Definition util.h:167
__m256i InterleaveBgr(__m256i blue, __m256i green, __m256i red)
const __m256i K32_01000000
Definition util.h:201
SIMD_INLINE __m256i YuvToGreen(__m256i y, __m256i u, __m256i v)
Definition util.h:437
const __m256i K8_01_FF
Definition util.h:178
const __m256i K8_SHUFFLE_BGR1_TO_GREEN
Definition util.h:286
SIMD_INLINE __m256i InterleaveBgr< 0 >(__m256i blue, __m256i green, __m256i red)
Definition util.h:461
const __m256i K16_0003
Definition util.h:182
const __m256i K_ZERO
Definition util.h:166
const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR1
Definition util.h:266
const __m256i K32_00000001
Definition util.h:194
const __m256i U_SHUFFLE3
Definition util.h:231
SIMD_INLINE void Store< false >(__m256i *p, __m256i a)
Definition util.h:352
const __m256i K32_0000FFFF
Definition util.h:199
const __m256i K8_08
Definition util.h:172
const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR0
Definition util.h:263
SIMD_INLINE __m256i Load(const __m256i *p)
const __m256i K32_000000FF
Definition util.h:198
void yuyv2rgb_avx(unsigned char *YUV, unsigned char *RGB, int NumPixels)
Definition util.cc:133
const __m256i K8_20
Definition util.h:174
const __m256i K8_SHUFFLE_BGR0_TO_GREEN
Definition util.h:283
SIMD_INLINE __m256i LoadPermuted(const __m256i *p)
Definition util.h:521
SIMD_INLINE __m256i Load< true >(const __m256i *p)
Definition util.h:336
const __m256i K16_0020
Definition util.h:189
const __m256i K32_FFFFFF00
Definition util.h:202
const __m256i K16_UB_0
Definition util.h:325
const __m256i K8_SHUFFLE_BGR2_TO_BLUE
Definition util.h:210
const __m256i U_SHUFFLE0
Definition util.h:221
SIMD_INLINE bool Aligned(const void *ptr, size_t align=sizeof(__m256))
Definition util.h:344
const __m256i K16_0002
Definition util.h:181
class register implement
Definition arena_queue.h:37