Apollo 10.0
自动驾驶开放平台
util.cc
浏览该文件的文档.
1/******************************************************************************
2 * Copyright 2018 The Apollo Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *****************************************************************************/
16
18
19#include <cstdarg>
20#include <cstdint>
21
22namespace apollo {
23namespace drivers {
24namespace camera {
25
26void print_m256(__m256i a) {
27 unsigned char snoop[32];
28 bool dst_align = Aligned(reinterpret_cast<void*>(snoop));
29 if (dst_align)
30 Store<true>(reinterpret_cast<__m256i*>(snoop), a);
31 else
32 Store<false>(reinterpret_cast<__m256i*>(snoop), a);
33 for (int i = 0; i < 32; ++i) {
34 printf("DEBUG8 %d %u \n", i, snoop[i]);
35 }
36}
37void print_m256_i32(const __m256i a) {
38 unsigned int snoop[8];
39 bool dst_align = Aligned(reinterpret_cast<void*>(snoop));
40 if (dst_align)
41 Store<true>(reinterpret_cast<__m256i*>(snoop), a);
42 else
43 Store<false>(reinterpret_cast<__m256i*>(snoop), a);
44 for (int i = 0; i < 8; ++i) {
45 printf("DEBUG32 %d %u \n", i, snoop[i]);
46 }
47}
48
49void print_m256_i16(const __m256i a) {
50 uint16_t snoop[16];
51 bool dst_align = Aligned(reinterpret_cast<void*>(snoop));
52 if (dst_align)
53 Store<true>(reinterpret_cast<__m256i*>(snoop), a);
54 else
55 Store<false>(reinterpret_cast<__m256i*>(snoop), a);
56 for (int i = 0; i < 16; ++i) {
57 printf("DEBUG16 %d %u \n", i, snoop[i]);
58 }
59}
60
61template <bool align>
62SIMD_INLINE void yuv_separate_avx2(uint8_t* y, __m256i* y0, __m256i* y1,
63 __m256i* u0, __m256i* v0) {
64 __m256i yuv_m256[4];
65
66 if (align) {
67 yuv_m256[0] = Load<true>(reinterpret_cast<__m256i*>(y));
68 yuv_m256[1] = Load<true>(reinterpret_cast<__m256i*>(y) + 1);
69 yuv_m256[2] = Load<true>(reinterpret_cast<__m256i*>(y) + 2);
70 yuv_m256[3] = Load<true>(reinterpret_cast<__m256i*>(y) + 3);
71 } else {
72 yuv_m256[0] = Load<false>(reinterpret_cast<__m256i*>(y));
73 yuv_m256[1] = Load<false>(reinterpret_cast<__m256i*>(y) + 1);
74 yuv_m256[2] = Load<false>(reinterpret_cast<__m256i*>(y) + 2);
75 yuv_m256[3] = Load<false>(reinterpret_cast<__m256i*>(y) + 3);
76 }
77
78 *y0 =
79 _mm256_or_si256(_mm256_permute4x64_epi64(
80 _mm256_shuffle_epi8(yuv_m256[0], Y_SHUFFLE0), 0xD8),
81 _mm256_permute4x64_epi64(
82 _mm256_shuffle_epi8(yuv_m256[1], Y_SHUFFLE1), 0xD8));
83 *y1 =
84 _mm256_or_si256(_mm256_permute4x64_epi64(
85 _mm256_shuffle_epi8(yuv_m256[2], Y_SHUFFLE0), 0xD8),
86 _mm256_permute4x64_epi64(
87 _mm256_shuffle_epi8(yuv_m256[3], Y_SHUFFLE1), 0xD8));
88
89 *u0 = _mm256_permutevar8x32_epi32(
90 _mm256_or_si256(
91 _mm256_or_si256(_mm256_shuffle_epi8(yuv_m256[0], U_SHUFFLE0),
92 _mm256_shuffle_epi8(yuv_m256[1], U_SHUFFLE1)),
93 _mm256_or_si256(_mm256_shuffle_epi8(yuv_m256[2], U_SHUFFLE2),
94 _mm256_shuffle_epi8(yuv_m256[3], U_SHUFFLE3))),
96 *v0 = _mm256_permutevar8x32_epi32(
97 _mm256_or_si256(
98 _mm256_or_si256(_mm256_shuffle_epi8(yuv_m256[0], V_SHUFFLE0),
99 _mm256_shuffle_epi8(yuv_m256[1], V_SHUFFLE1)),
100 _mm256_or_si256(_mm256_shuffle_epi8(yuv_m256[2], V_SHUFFLE2),
101 _mm256_shuffle_epi8(yuv_m256[3], V_SHUFFLE3))),
102 U_SHUFFLE4);
103}
104
105template <bool align>
106void yuv2rgb_avx2(__m256i y0, __m256i u0, __m256i v0, uint8_t* rgb) {
107 __m256i r0 = YuvToRed(y0, v0);
108 __m256i g0 = YuvToGreen(y0, u0, v0);
109 __m256i b0 = YuvToBlue(y0, u0);
110
111 Store<align>(reinterpret_cast<__m256i*>(rgb) + 0,
112 InterleaveBgr<0>(r0, g0, b0));
113 Store<align>(reinterpret_cast<__m256i*>(rgb) + 1,
114 InterleaveBgr<1>(r0, g0, b0));
115 Store<align>(reinterpret_cast<__m256i*>(rgb) + 2,
116 InterleaveBgr<2>(r0, g0, b0));
117}
118
119template <bool align>
120void yuv2rgb_avx2(uint8_t* yuv, uint8_t* rgb) {
121 __m256i y0, y1, u0, v0;
122
123 yuv_separate_avx2<align>(yuv, &y0, &y1, &u0, &v0);
124 __m256i u0_u0 = _mm256_permute4x64_epi64(u0, 0xD8);
125 __m256i v0_v0 = _mm256_permute4x64_epi64(v0, 0xD8);
126 yuv2rgb_avx2<align>(y0, _mm256_unpacklo_epi8(u0_u0, u0_u0),
127 _mm256_unpacklo_epi8(v0_v0, v0_v0), rgb);
128 yuv2rgb_avx2<align>(y1, _mm256_unpackhi_epi8(u0_u0, u0_u0),
129 _mm256_unpackhi_epi8(v0_v0, v0_v0),
130 rgb + 3 * sizeof(__m256i));
131}
132
133void yuyv2rgb_avx(unsigned char* YUV, unsigned char* RGB, int NumPixels) {
134 assert(NumPixels == (1920 * 1080));
135 bool align = Aligned(YUV) & Aligned(RGB);
136 uint8_t* yuv_offset = YUV;
137 uint8_t* rgb_offset = RGB;
138 if (align) {
139 for (int i = 0; i < NumPixels;
140 i = i + static_cast<int>(2 * sizeof(__m256i)),
141 yuv_offset += 4 * static_cast<int>(sizeof(__m256i)),
142 rgb_offset += 6 * static_cast<int>(sizeof(__m256i))) {
143 yuv2rgb_avx2<true>(yuv_offset, rgb_offset);
144 }
145 } else {
146 for (int i = 0; i < NumPixels;
147 i = i + static_cast<int>(2 * sizeof(__m256i)),
148 yuv_offset += 4 * static_cast<int>(sizeof(__m256i)),
149 rgb_offset += 6 * static_cast<int>(sizeof(__m256i))) {
150 yuv2rgb_avx2<false>(yuv_offset, rgb_offset);
151 }
152 }
153}
154
155} // namespace camera
156} // namespace drivers
157} // namespace apollo
#define SIMD_INLINE
Definition util.h:43
void print_m256(__m256i a)
Definition util.cc:26
const __m256i Y_SHUFFLE0
Definition util.h:214
const __m256i V_SHUFFLE3
Definition util.h:249
SIMD_INLINE __m256i YuvToRed(__m256i y, __m256i v)
Definition util.h:426
SIMD_INLINE __m256i InterleaveBgr< 2 >(__m256i blue, __m256i green, __m256i red)
Definition util.h:483
const __m256i U_SHUFFLE1
Definition util.h:224
void print_m256_i16(const __m256i a)
Definition util.cc:49
void yuv2rgb_avx2(__m256i y0, __m256i u0, __m256i v0, uint8_t *rgb)
Definition util.cc:106
SIMD_INLINE void Store< true >(__m256i *p, __m256i a)
Definition util.h:357
SIMD_INLINE __m256i YuvToBlue(__m256i y, __m256i u)
Definition util.h:449
SIMD_INLINE __m256i Load< false >(const __m256i *p)
Definition util.h:331
const __m256i V_SHUFFLE1
Definition util.h:242
const __m256i U_SHUFFLE2
Definition util.h:227
const __m256i V_SHUFFLE2
Definition util.h:245
void print_m256_i32(const __m256i a)
Definition util.cc:37
SIMD_INLINE __m256i InterleaveBgr< 1 >(__m256i blue, __m256i green, __m256i red)
Definition util.h:472
const __m256i Y_SHUFFLE1
Definition util.h:217
const __m256i V_SHUFFLE0
Definition util.h:239
const __m256i U_SHUFFLE4
Definition util.h:234
SIMD_INLINE void yuv_separate_avx2(uint8_t *y, __m256i *y0, __m256i *y1, __m256i *u0, __m256i *v0)
Definition util.cc:62
SIMD_INLINE __m256i YuvToGreen(__m256i y, __m256i u, __m256i v)
Definition util.h:437
SIMD_INLINE __m256i InterleaveBgr< 0 >(__m256i blue, __m256i green, __m256i red)
Definition util.h:461
const __m256i U_SHUFFLE3
Definition util.h:231
SIMD_INLINE void Store< false >(__m256i *p, __m256i a)
Definition util.h:352
void yuyv2rgb_avx(unsigned char *YUV, unsigned char *RGB, int NumPixels)
Definition util.cc:133
SIMD_INLINE __m256i Load< true >(const __m256i *p)
Definition util.h:336
const __m256i U_SHUFFLE0
Definition util.h:221
SIMD_INLINE bool Aligned(const void *ptr, size_t align=sizeof(__m256))
Definition util.h:344
class register implement
Definition arena_queue.h:37