31 data =
reinterpret_cast<unsigned char *
>(
const_cast<T *
>(data_pointer));
33 inline __device__ T &
operator()(
const size_t i,
const size_t j) {
34 return *(
reinterpret_cast<T *
>(
data +
width_step * j + i *
sizeof(T)));
36 inline __device__
const T &
operator()(
const size_t i,
const size_t j)
const {
37 return *(
reinterpret_cast<T *
>(
data +
width_step * j + i *
sizeof(T)));
42 int channels,
int width_step) {
43 descPtr->dataType = RpptDataType::U8;
45 descPtr->offsetInBytes = 0;
49 descPtr->c = channels;
52 descPtr->layout = RpptLayout::NCHW;
53 descPtr->strides.wStride = 1;
54 descPtr->strides.hStride = width_step;
55 descPtr->strides.cStride = descPtr->strides.hStride * descPtr->h;
56 descPtr->strides.nStride =
57 descPtr->strides.hStride * descPtr->h * descPtr->c;
60 descPtr->layout = RpptLayout::NHWC;
61 descPtr->strides.cStride = 1;
62 descPtr->strides.wStride = descPtr->c;
63 descPtr->strides.hStride = width_step;
64 descPtr->strides.nStride = descPtr->strides.hStride * descPtr->h;
67 AERROR <<
"Invalid number of channels: " << channels
68 <<
"; only 1 and 3 are supported.";
75 RpptDesc srcDesc, dstDesc;
76 RpptDescPtr srcDescPtr = &srcDesc, dstDescPtr = &dstDesc;
84 blob->
count(2) *
static_cast<int>(
sizeof(uint8_t))))
88 rppCreateWithBatchSize(&handle, 1);
90 rppt_copy_gpu((
const_cast<base::Image8U &
>(image)).mutable_gpu_data(),
92 if (status != RPP_SUCCESS)
98 const int src_width,
const int src_height,
99 const float coeffs[3]) {
100 RppStatus status = RPP_SUCCESS;
101 RpptDesc srcDesc, dstDesc;
102 RpptDescPtr srcDescPtr = &srcDesc, dstDescPtr = &dstDesc;
112 rppCreateWithBatchSize(&handle, 1);
113 assert((coeffs[1] == 0.587f) &&
114 ((coeffs[0] == 0.114f && coeffs[2] == 0.299f) ||
115 (coeffs[0] == 0.299f && coeffs[2] == 0.114f)) &&
116 "coefficients in rppt_color_to_greyscale_gpu are hardcoded");
118 if (coeffs[0] == 0.114f && coeffs[1] == 0.587f && coeffs[2] == 0.299f) {
119 status = rppt_color_to_greyscale_gpu(src->mutable_gpu_data(), srcDescPtr,
120 dst->mutable_gpu_data(), dstDescPtr,
121 RpptSubpixelLayout::BGRtype, handle);
124 if (coeffs[0] == 0.299f && coeffs[1] == 0.587f && coeffs[2] == 0.114f) {
125 status = rppt_color_to_greyscale_gpu(src->mutable_gpu_data(), srcDescPtr,
126 dst->mutable_gpu_data(), dstDescPtr,
127 RpptSubpixelLayout::RGBtype, handle);
129 if (status != RPP_SUCCESS)
136 const int src_width,
const int src_height,
137 const int order[3]) {
138 RpptDesc srcDesc, dstDesc;
139 RpptDescPtr srcDescPtr = &srcDesc, dstDescPtr = &dstDesc;
149 rppCreateWithBatchSize(&handle, 1);
150 assert(order[0] == 2 && order[1] == 1 && order[2] == 0 &&
151 "The order in rppt_swap_channels is hardcoded");
153 rppt_swap_channels_gpu(src->mutable_gpu_data(), srcDescPtr,
154 dst->mutable_gpu_data(), dstDescPtr, handle);
155 if (status != RPP_SUCCESS)
161 size_t src_width_step, uchar3 *dst,
162 size_t dst_width_step,
int width,
int height) {
163 const size_t i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
164 const size_t j = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
168 if (i < width && j < height) {
169 unsigned char value = src_img(i, j);
170 dst_img(i, j).x = value;
171 dst_img(i, j).y = value;
172 dst_img(i, j).z = value;
178 const int src_width,
const int src_height) {
180 dim3 blocks((src_width + threadsPerBlock.x - 1) / threadsPerBlock.x,
181 (src_height + threadsPerBlock.y - 1) / threadsPerBlock.y);
184 src->gpu_data(), src->width_step(),
185 reinterpret_cast<uchar3 *
>(dst->mutable_gpu_data()),
186 dst->width_step(), src_width, src_height);
188 if (hipSuccess != hipGetLastError())
194 size_t src_width_step,
unsigned char *dst,
195 size_t dst_width_step,
const float *mapx,
196 const float *mapy,
int width,
int height) {
197 const size_t i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
198 const size_t j = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
201 if (i < width && j < height) {
202 float x_coor = mapx[j * width + i];
203 float y_coor = mapy[j * width + i];
205 int X = trunc(x_coor);
206 int Y = trunc(y_coor);
207 float x_frac = x_coor - X;
208 float y_frac = y_coor - Y;
210 if (0 <= X && X < width && 0 <= Y && Y < height) {
212 int X1 = (X < width - 1) ? X + 1 : X;
213 int Y1 = (Y < height - 1) ? Y + 1 : Y;
215 unsigned char pixel00 = src_img(X, Y);
216 unsigned char pixel01 = src_img(X1, Y);
217 unsigned char pixel10 = src_img(X, Y1);
218 unsigned char pixel11 = src_img(X1, Y1);
220 unsigned char interpolated =
221 (pixel00 * (1 - x_frac) + pixel01 * x_frac) * (1 - y_frac) +
222 (pixel10 * (1 - x_frac) + pixel11 * x_frac) * y_frac;
223 dst_img(i, j) = interpolated;
229 uchar3 *dst,
size_t dst_width_step,
230 const float *mapx,
const float *mapy,
231 int width,
int height) {
232 const size_t i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
233 const size_t j = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
238 if (i < width && j < height) {
239 float x_coor = mapx[j * width + i];
240 float y_coor = mapy[j * width + i];
242 int X = trunc(x_coor);
243 int Y = trunc(y_coor);
244 float x_frac = x_coor - X;
245 float y_frac = y_coor - Y;
247 if (0 <= X && X < width && 0 <= Y && Y < height) {
249 int X1 = (X < width - 1) ? X + 1 : X;
250 int Y1 = (Y < height - 1) ? Y + 1 : Y;
252 uchar3 pixel00 = src_img(X, Y);
253 uchar3 pixel01 = src_img(X1, Y);
254 uchar3 pixel10 = src_img(X, Y1);
255 uchar3 pixel11 = src_img(X1, Y1);
259 (pixel00.x * (1 - x_frac) + pixel01.x * x_frac) * (1 - y_frac) +
260 (pixel10.x * (1 - x_frac) + pixel11.x * x_frac) * y_frac;
262 (pixel00.y * (1 - x_frac) + pixel01.y * x_frac) * (1 - y_frac) +
263 (pixel10.y * (1 - x_frac) + pixel11.y * x_frac) * y_frac;
265 (pixel00.z * (1 - x_frac) + pixel01.z * x_frac) * (1 - y_frac) +
266 (pixel10.z * (1 - x_frac) + pixel11.z * x_frac) * y_frac;
268 dst_img(i, j) = interpolated;
274 const int src_width,
const int src_height,
278 dim3 blocks((src_width + threadsPerBlock.x - 1) / threadsPerBlock.x,
279 (src_height + threadsPerBlock.y - 1) / threadsPerBlock.y);
292 reinterpret_cast<const uchar3 *
>(src_img.
gpu_data()),
300 <<
"; only 1 and 3 are supported.";
303 if (hipSuccess != hipGetLastError())
A wrapper around SyncedMemory holders serving as the basic computational unit for images,...
const Dtype * gpu_data() const
Dtype * mutable_gpu_data()
void Reshape(const int num, const int channels, const int height, const int width)
Deprecated; use Reshape(const std::vector<int>& shape).
A wrapper around Blob holders serving as the basic computational unit for images.
uint8_t * mutable_gpu_data()
const uint8_t * gpu_data() const
std::shared_ptr< Image8U > Image8UPtr
__global__ void duplicate_kernel(const unsigned char *src, size_t src_width_step, uchar3 *dst, size_t dst_width_step, int width, int height)
__global__ void remap_pln1_kernel(const unsigned char *src, size_t src_width_step, unsigned char *dst, size_t dst_width_step, const float *mapx, const float *mapy, int width, int height)
bool rppInitDescriptor(RpptDescPtr &descPtr, int width, int height, int channels, int width_step)
bool rppImageToGray(const base::Image8UPtr &src, const base::Image8UPtr &dst, const int src_width, const int src_height, const float coeffs[3])
const uint32_t THREADS_PER_BLOCK_X
__global__ void remap_pkd3_kernel(const uchar3 *src, size_t src_width_step, uchar3 *dst, size_t dst_width_step, const float *mapx, const float *mapy, int width, int height)
bool rppSwapImageChannels(const base::Image8UPtr &src, const base::Image8UPtr &dst, const int src_width, const int src_height, const int order[3])
bool rppImageToBlob(const base::Image8U &image, base::Blob< uint8_t > *blob)
const uint32_t THREADS_PER_BLOCK_Y
bool rppDupImageChannels(const base::Image8UPtr &src, const base::Image8UPtr &dst, const int src_width, const int src_height)
bool rppImageRemap(const base::Image8U &src_img, base::Image8U *dst_img, const int src_width, const int src_height, const base::Blob< float > &map_x, const base::Blob< float > &map_y)
__device__ image2D(const T *data_pointer, size_t width_step)
__device__ const T & operator()(const size_t i, const size_t j) const
__device__ T & operator()(const size_t i, const size_t j)