Apollo 10.0
自动驾驶开放平台
image_data_operations_rpp.h
浏览该文件的文档.
1/******************************************************************************
2 * Copyright 2022 The Apollo Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the License);
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an AS IS BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *****************************************************************************/
16#pragma once
17
18namespace apollo {
19namespace perception {
20namespace camera {
21
22const uint32_t THREADS_PER_BLOCK_X = 32;
23const uint32_t THREADS_PER_BLOCK_Y = 32;
24
25template <typename T>
26struct image2D {
27 unsigned char *data;
28 size_t width_step;
29 __device__ image2D(const T *data_pointer, size_t width_step)
31 data = reinterpret_cast<unsigned char *>(const_cast<T *>(data_pointer));
32 }
33 inline __device__ T &operator()(const size_t i, const size_t j) {
34 return *(reinterpret_cast<T *>(data + width_step * j + i * sizeof(T)));
35 }
36 inline __device__ const T &operator()(const size_t i, const size_t j) const {
37 return *(reinterpret_cast<T *>(data + width_step * j + i * sizeof(T)));
38 }
39};
40
41bool rppInitDescriptor(RpptDescPtr &descPtr, int width, int height,
42 int channels, int width_step) {
43 descPtr->dataType = RpptDataType::U8;
44 descPtr->numDims = 4;
45 descPtr->offsetInBytes = 0;
46 descPtr->n = 1;
47 descPtr->h = height;
48 descPtr->w = width;
49 descPtr->c = channels;
50 switch (channels) {
51 case 1:
52 descPtr->layout = RpptLayout::NCHW;
53 descPtr->strides.wStride = 1;
54 descPtr->strides.hStride = width_step;
55 descPtr->strides.cStride = descPtr->strides.hStride * descPtr->h;
56 descPtr->strides.nStride =
57 descPtr->strides.hStride * descPtr->h * descPtr->c;
58 break;
59 case 3:
60 descPtr->layout = RpptLayout::NHWC;
61 descPtr->strides.cStride = 1;
62 descPtr->strides.wStride = descPtr->c;
63 descPtr->strides.hStride = width_step;
64 descPtr->strides.nStride = descPtr->strides.hStride * descPtr->h;
65 break;
66 default:
67 AERROR << "Invalid number of channels: " << channels
68 << "; only 1 and 3 are supported.";
69 return false;
70 }
71 return true;
72}
73
75 RpptDesc srcDesc, dstDesc;
76 RpptDescPtr srcDescPtr = &srcDesc, dstDescPtr = &dstDesc;
77
78 blob->Reshape({1, image.rows(), image.cols(), image.channels()});
79
80 if (!rppInitDescriptor(srcDescPtr, image.cols(), image.rows(),
81 image.channels(), image.width_step()))
82 return false;
83 if (!rppInitDescriptor(dstDescPtr, image.cols(), image.rows(), image.channels(),
84 blob->count(2) * static_cast<int>(sizeof(uint8_t))))
85 return false;
86
87 rppHandle_t handle;
88 rppCreateWithBatchSize(&handle, 1);
89 RppStatus status =
90 rppt_copy_gpu((const_cast<base::Image8U &>(image)).mutable_gpu_data(),
91 srcDescPtr, blob->mutable_gpu_data(), dstDescPtr, handle);
92 if (status != RPP_SUCCESS)
93 return false;
94 return true;
95}
96
98 const int src_width, const int src_height,
99 const float coeffs[3]) {
100 RppStatus status = RPP_SUCCESS;
101 RpptDesc srcDesc, dstDesc;
102 RpptDescPtr srcDescPtr = &srcDesc, dstDescPtr = &dstDesc;
103
104 if (!rppInitDescriptor(srcDescPtr, src_width, src_height, 3,
105 src->width_step()))
106 return false;
107 if (!rppInitDescriptor(dstDescPtr, src_width, src_height, 1,
108 dst->width_step()))
109 return false;
110
111 rppHandle_t handle;
112 rppCreateWithBatchSize(&handle, 1);
113 assert((coeffs[1] == 0.587f) &&
114 ((coeffs[0] == 0.114f && coeffs[2] == 0.299f) ||
115 (coeffs[0] == 0.299f && coeffs[2] == 0.114f)) &&
116 "coefficients in rppt_color_to_greyscale_gpu are hardcoded");
117 // BGR: float coeffs[] = {0.114f, 0.587f, 0.299f};
118 if (coeffs[0] == 0.114f && coeffs[1] == 0.587f && coeffs[2] == 0.299f) {
119 status = rppt_color_to_greyscale_gpu(src->mutable_gpu_data(), srcDescPtr,
120 dst->mutable_gpu_data(), dstDescPtr,
121 RpptSubpixelLayout::BGRtype, handle);
122 }
123 // RGB: float coeffs[] = {0.299f, 0.587f, 0.114f};
124 if (coeffs[0] == 0.299f && coeffs[1] == 0.587f && coeffs[2] == 0.114f) {
125 status = rppt_color_to_greyscale_gpu(src->mutable_gpu_data(), srcDescPtr,
126 dst->mutable_gpu_data(), dstDescPtr,
127 RpptSubpixelLayout::RGBtype, handle);
128 }
129 if (status != RPP_SUCCESS)
130 return false;
131 return true;
132}
133
135 const base::Image8UPtr &dst,
136 const int src_width, const int src_height,
137 const int order[3]) {
138 RpptDesc srcDesc, dstDesc;
139 RpptDescPtr srcDescPtr = &srcDesc, dstDescPtr = &dstDesc;
140
141 if (!rppInitDescriptor(srcDescPtr, src_width, src_height, 3,
142 src->width_step()))
143 return false;
144 if (!rppInitDescriptor(dstDescPtr, src_width, src_height, 3,
145 dst->width_step()))
146 return false;
147
148 rppHandle_t handle;
149 rppCreateWithBatchSize(&handle, 1);
150 assert(order[0] == 2 && order[1] == 1 && order[2] == 0 &&
151 "The order in rppt_swap_channels is hardcoded");
152 RppStatus status =
153 rppt_swap_channels_gpu(src->mutable_gpu_data(), srcDescPtr,
154 dst->mutable_gpu_data(), dstDescPtr, handle);
155 if (status != RPP_SUCCESS)
156 return false;
157 return true;
158}
159
160__global__ void duplicate_kernel(const unsigned char *src,
161 size_t src_width_step, uchar3 *dst,
162 size_t dst_width_step, int width, int height) {
163 const size_t i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
164 const size_t j = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
165 image2D<unsigned char> src_img{src, src_width_step};
166 image2D<uchar3> dst_img{dst, dst_width_step};
167
168 if (i < width && j < height) {
169 unsigned char value = src_img(i, j);
170 dst_img(i, j).x = value;
171 dst_img(i, j).y = value;
172 dst_img(i, j).z = value;
173 }
174}
175
177 const base::Image8UPtr &dst,
178 const int src_width, const int src_height) {
179 dim3 threadsPerBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y);
180 dim3 blocks((src_width + threadsPerBlock.x - 1) / threadsPerBlock.x,
181 (src_height + threadsPerBlock.y - 1) / threadsPerBlock.y);
182
183 hipLaunchKernelGGL(duplicate_kernel, blocks, threadsPerBlock, 0, 0,
184 src->gpu_data(), src->width_step(),
185 reinterpret_cast<uchar3 *>(dst->mutable_gpu_data()),
186 dst->width_step(), src_width, src_height);
187
188 if (hipSuccess != hipGetLastError())
189 return false;
190 return true;
191}
192
193__global__ void remap_pln1_kernel(const unsigned char *src,
194 size_t src_width_step, unsigned char *dst,
195 size_t dst_width_step, const float *mapx,
196 const float *mapy, int width, int height) {
197 const size_t i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
198 const size_t j = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
199 image2D<unsigned char> src_img{src, src_width_step};
200 image2D<unsigned char> dst_img{dst, dst_width_step};
201 if (i < width && j < height) {
202 float x_coor = mapx[j * width + i];
203 float y_coor = mapy[j * width + i];
204
205 int X = trunc(x_coor);
206 int Y = trunc(y_coor);
207 float x_frac = x_coor - X;
208 float y_frac = y_coor - Y;
209
210 if (0 <= X && X < width && 0 <= Y && Y < height) {
211 // uchar p[2][2];
212 int X1 = (X < width - 1) ? X + 1 : X;
213 int Y1 = (Y < height - 1) ? Y + 1 : Y;
214
215 unsigned char pixel00 = src_img(X, Y);
216 unsigned char pixel01 = src_img(X1, Y);
217 unsigned char pixel10 = src_img(X, Y1);
218 unsigned char pixel11 = src_img(X1, Y1);
219 // bilinear interpolation
220 unsigned char interpolated =
221 (pixel00 * (1 - x_frac) + pixel01 * x_frac) * (1 - y_frac) +
222 (pixel10 * (1 - x_frac) + pixel11 * x_frac) * y_frac;
223 dst_img(i, j) = interpolated;
224 }
225 }
226}
227
228__global__ void remap_pkd3_kernel(const uchar3 *src, size_t src_width_step,
229 uchar3 *dst, size_t dst_width_step,
230 const float *mapx, const float *mapy,
231 int width, int height) {
232 const size_t i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
233 const size_t j = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
234
235 image2D<uchar3> src_img{src, src_width_step};
236 image2D<uchar3> dst_img{dst, dst_width_step};
237
238 if (i < width && j < height) {
239 float x_coor = mapx[j * width + i];
240 float y_coor = mapy[j * width + i];
241
242 int X = trunc(x_coor);
243 int Y = trunc(y_coor);
244 float x_frac = x_coor - X;
245 float y_frac = y_coor - Y;
246
247 if (0 <= X && X < width && 0 <= Y && Y < height) {
248 // uchar3 p[2][2];
249 int X1 = (X < width - 1) ? X + 1 : X;
250 int Y1 = (Y < height - 1) ? Y + 1 : Y;
251
252 uchar3 pixel00 = src_img(X, Y);
253 uchar3 pixel01 = src_img(X1, Y);
254 uchar3 pixel10 = src_img(X, Y1);
255 uchar3 pixel11 = src_img(X1, Y1);
256 // bilinear interpolation
257 uchar3 interpolated;
258 interpolated.x =
259 (pixel00.x * (1 - x_frac) + pixel01.x * x_frac) * (1 - y_frac) +
260 (pixel10.x * (1 - x_frac) + pixel11.x * x_frac) * y_frac;
261 interpolated.y =
262 (pixel00.y * (1 - x_frac) + pixel01.y * x_frac) * (1 - y_frac) +
263 (pixel10.y * (1 - x_frac) + pixel11.y * x_frac) * y_frac;
264 interpolated.z =
265 (pixel00.z * (1 - x_frac) + pixel01.z * x_frac) * (1 - y_frac) +
266 (pixel10.z * (1 - x_frac) + pixel11.z * x_frac) * y_frac;
267
268 dst_img(i, j) = interpolated;
269 }
270 }
271}
272
273bool rppImageRemap(const base::Image8U &src_img, base::Image8U *dst_img,
274 const int src_width, const int src_height,
275 const base::Blob<float> &map_x,
276 const base::Blob<float> &map_y) {
277 dim3 threadsPerBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y);
278 dim3 blocks((src_width + threadsPerBlock.x - 1) / threadsPerBlock.x,
279 (src_height + threadsPerBlock.y - 1) / threadsPerBlock.y);
280
281 switch (src_img.channels()) {
282 case 1:
283 hipLaunchKernelGGL(remap_pln1_kernel, blocks, threadsPerBlock, 0, 0,
284 src_img.gpu_data(), src_img.width_step(),
285 dst_img->mutable_gpu_data(), dst_img->width_step(),
286 map_x.gpu_data(), map_y.gpu_data(), src_width,
287 src_height);
288 break;
289 case 3:
290 hipLaunchKernelGGL(
291 remap_pkd3_kernel, blocks, threadsPerBlock, 0, 0,
292 reinterpret_cast<const uchar3 *>(src_img.gpu_data()),
293 src_img.width_step(),
294 reinterpret_cast<uchar3 *>(dst_img->mutable_gpu_data()),
295 dst_img->width_step(), map_x.gpu_data(), map_y.gpu_data(), src_width,
296 src_height);
297 break;
298 default:
299 AERROR << "Invalid number of channels: " << src_img.channels()
300 << "; only 1 and 3 are supported.";
301 return false;
302 }
303 if (hipSuccess != hipGetLastError())
304 return false;
305 return true;
306}
307
308} // namespace camera
309} // namespace perception
310} // namespace apollo
A wrapper around SyncedMemory holders serving as the basic computational unit for images,...
Definition blob.h:88
const Dtype * gpu_data() const
Definition blob.cc:154
void Reshape(const int num, const int channels, const int height, const int width)
Deprecated; use Reshape(const std::vector<int>& shape).
Definition blob.cc:72
A wrapper around Blob holders serving as the basic computational unit for images.
Definition image_8u.h:44
const uint8_t * gpu_data() const
Definition image_8u.h:101
#define AERROR
Definition log.h:44
std::shared_ptr< Image8U > Image8UPtr
Definition image_8u.h:148
__global__ void duplicate_kernel(const unsigned char *src, size_t src_width_step, uchar3 *dst, size_t dst_width_step, int width, int height)
__global__ void remap_pln1_kernel(const unsigned char *src, size_t src_width_step, unsigned char *dst, size_t dst_width_step, const float *mapx, const float *mapy, int width, int height)
bool rppInitDescriptor(RpptDescPtr &descPtr, int width, int height, int channels, int width_step)
bool rppImageToGray(const base::Image8UPtr &src, const base::Image8UPtr &dst, const int src_width, const int src_height, const float coeffs[3])
__global__ void remap_pkd3_kernel(const uchar3 *src, size_t src_width_step, uchar3 *dst, size_t dst_width_step, const float *mapx, const float *mapy, int width, int height)
bool rppSwapImageChannels(const base::Image8UPtr &src, const base::Image8UPtr &dst, const int src_width, const int src_height, const int order[3])
bool rppImageToBlob(const base::Image8U &image, base::Blob< uint8_t > *blob)
bool rppDupImageChannels(const base::Image8UPtr &src, const base::Image8UPtr &dst, const int src_width, const int src_height)
bool rppImageRemap(const base::Image8U &src_img, base::Image8U *dst_img, const int src_width, const int src_height, const base::Blob< float > &map_x, const base::Blob< float > &map_y)
class register implement
Definition arena_queue.h:37
__device__ image2D(const T *data_pointer, size_t width_step)
__device__ const T & operator()(const size_t i, const size_t j) const
__device__ T & operator()(const size_t i, const size_t j)