24#include <cuda_runtime.h>
25#include <pcl/features/normal_3d.h>
26#include <pcl/io/pcd_io.h>
27#include <pcl/search/kdtree.h>
28#include <pcl/search/search.h>
29#include <pcl/segmentation/region_growing.h>
67 {0,
"car"}, {1,
"truck"}, {2,
"construction_vehicle"},
68 {3,
"bus"}, {4,
"trailer"}, {5,
"barrier"},
69 {6,
"motorcycle"}, {7,
"bicycle"}, {8,
"pedestrian"},
86 {4,
"TrainedOthers"}};
88void BEVFORMERObstacleDetector::InitImageSize(
89 const bevformer::ModelParam &model_param) {
90 auto resize = model_param.resize();
91 if (resize.width() == 0 && resize.height() == 0) {
95 width_ = resize.width();
96 height_ = resize.height();
99 AINFO <<
"height=" << height_ <<
", "
100 <<
"width=" << width_;
103bool BEVFORMERObstacleDetector::InitTypes(
104 const bevformer::ModelParam &model_param) {
105 for (
const auto &class_name : model_param.info().class_names()) {
110 AERROR <<
"Unsupported subtype type!" << class_name;
121 BASE_GPU_CHECK(cudaSetDevice(gpu_id_));
122 BASE_GPU_CHECK(cudaStreamCreate(&stream_));
124 std::string config_file =
126 std::cout <<
" config file is " << config_file << std::endl;
128 AERROR <<
"Read model param failed!";
132 InitTypes(model_param_);
133 InitImageSize(model_param_);
136 mean_bgr_ = {normalize.
mean()[0], normalize.
mean()[1], normalize.
mean()[2]};
137 std_bgr_ = {normalize.
std()[0], normalize.
std()[1], normalize.
std()[2]};
139 const auto &model_info = model_param_.
info();
140 std::string model_path =
GetModelPath(model_info.name());
143 AERROR <<
"Init network failed!";
146 std::cout <<
"bevformer model init success from "
147 << model_info.proto_file().file() << std::endl;
149 auto model_outputs = model_param_.
info().outputs();
150 num_decoder_layer_ = model_outputs[1].shape()[0];
151 num_queries_ = model_outputs[1].shape()[2];
152 num_classes_det_ = model_outputs[1].shape()[3];
153 code_size_ = model_outputs[2].shape()[3];
154 class_blob_start_index_ =
155 (num_decoder_layer_ - 1) * num_queries_ * num_classes_det_;
156 class_blob_end_index_ =
157 (num_decoder_layer_)*num_queries_ * num_classes_det_;
158 box_blob_start_index_ =
159 (num_decoder_layer_ - 1) * num_queries_ * code_size_;
160 AINFO <<
"number of decoder layer:" << num_decoder_layer_
161 <<
" number of query: " << num_queries_
162 <<
" number of classes: " << num_classes_det_
163 <<
" code size: " << code_size_
164 <<
" class blob start index: " << class_blob_start_index_
165 <<
" class blob end index: " << class_blob_end_index_
166 <<
" box blob start index: " << box_blob_start_index_;
168 num_voxels_ = model_outputs[occ_blob_index_].shape()[1];
169 num_classes_occ_ = model_outputs[occ_blob_index_].shape()[2];
170 AINFO <<
" number of occ voxels " << num_voxels_
171 <<
" number of occ classes " << num_classes_occ_;
172 occ_xmin_ = model_param_.
occ_xmin();
173 occ_xmax_ = model_param_.
occ_xmax();
174 occ_ymin_ = model_param_.
occ_ymin();
175 occ_ymax_ = model_param_.
occ_ymax();
176 occ_zmin_ = model_param_.
occ_zmin();
177 occ_zmax_ = model_param_.
occ_zmax();
179 occ_x_grid_ = (occ_xmax_ - occ_xmin_) / voxel_size_;
180 occ_y_grid_ = (occ_ymax_ - occ_ymin_) / voxel_size_;
181 occ_z_grid_ = (occ_zmax_ - occ_zmin_) / voxel_size_;
184 auto model_inputs = model_param_.
info().inputs();
185 auto prev_bev_shape = model_inputs[1].shape();
186 for (
auto size : prev_bev_shape) {
187 prev_bev_size_ *= size;
189 use_prev_bev_ =
new float(0.0);
193 CHECK(no_pad_image_width <= model_inputs[0].shape()[3]);
194 CHECK(no_pad_image_height <= model_inputs[0].shape()[4]);
195 no_pad_image_shape_[0] = no_pad_image_width;
196 no_pad_image_shape_[1] = no_pad_image_height;
201 trans_wrapper_->Init(
"CAM_FRONT");
205void BEVFORMERObstacleDetector::Mat2Vec(
const cv::Mat &im,
float *data) {
209 int rc = im.channels();
211 for (
int i = 0; i < rc; ++i) {
212 cv::extractChannel(im, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
216bool BEVFORMERObstacleDetector::ImagePreprocessGPU(
217 const CameraFrame *frame, base::BlobPtr<float> input_img_blob) {
219 ACHECK(input_img_blob !=
nullptr);
221 DataProvider::ImageOptions image_options;
227 std::vector<int> img_blob_shape(input_img_blob->shape().begin(),
228 input_img_blob->shape().end());
230 std::vector<int> value(1);
231 auto resized_image = std::make_shared<Blob<float>>(value);
232 resized_image->Reshape({img_blob_shape[0], img_blob_shape[2],
233 img_blob_shape[3], img_blob_shape[4]});
236 for (
int i = 0; i < img_blob_shape[1]; ++i) {
238 frame->data_provider[i]->
GetImage(image_options, &image);
246 int roi_width = image.cols();
247 int roi_height = image.rows();
268 bottom_pad = img_blob_shape[3] / scale - roi_height;
270 int dst_height = roi_height + bottom_pad + top_pad;
271 int dst_width = roi_width + left_pad + right_pad;
276 CHECK(dst_height * scale == img_blob_shape[3]);
277 CHECK(dst_width * scale == img_blob_shape[4]);
279 base::Image8U padding_image_container_ =
280 base::Image8U(dst_height, dst_width, base::Color::RGB);
282 base::Image8U tmp_image =
283 padding_image_container_(base::RectI(0, 0, dst_width, dst_height));
285 inference::ImageZeroPadding(image, &tmp_image, roi_width, left_pad,
286 right_pad, top_pad, bottom_pad, 0, stream_,
291 inference::ResizeGPU(tmp_image, resized_image,
292 frame->data_provider[i]->src_width(), 0, mean_bgr_[0],
293 mean_bgr_[1], mean_bgr_[2], false, std_bgr_[0],
294 std_bgr_[1], std_bgr_[2]);
299 cudaMemcpy(input_img_blob->mutable_gpu_data() + i * img_blob_shape[2] *
302 resized_image->gpu_data(),
303 img_blob_shape[2] * img_blob_shape[3] * img_blob_shape[4] *
305 cudaMemcpyHostToDevice);
313 const CameraFrame *frame, base::BlobPtr<
float> input_img_blob) {
315 ACHECK(input_img_blob !=
nullptr);
317 DataProvider::ImageOptions image_options;
323 std::vector<float> images_data;
325 input_img_blob->Reshape({1, camera_num, 3, 480, 800});
327 float *img_data_ptr = input_img_blob->mutable_cpu_data();
330 for (
int i = 0; i < camera_num; ++i) {
332 frame->data_provider[i]->
GetImage(image_options, &image);
333 cv::Mat img(image.rows(), image.cols(), CV_8UC3, cv::Scalar(0, 0, 0));
334 memcpy(img.data, image.cpu_data(), image.total() * sizeof(uint8_t));
339 cv::resize(img, img, cv::Size(width_, height_));
345 if (
std::ceil(img.rows / 32) * 32 < img.rows) {
346 bg_rows = (std::ceil(img.rows / 32) + 1) * 32;
348 bg_rows = std::ceil(img.rows / 32) * 32;
350 cv::Mat out(bg_rows, img.cols, CV_8UC3, {0, 0, 0});
352 img.copyTo(out(cv::Rect(0, 0, img.cols, img.rows)));
356 common::
Normalize normalize = model_param_.normalize();
357 std::vector<
float> mean{normalize.mean().begin(), normalize.mean().end()};
358 std::vector<float>
std{normalize.std().begin(), normalize.std().end()};
359 std::reverse(mean.begin(), mean.end());
360 std::reverse(
std.begin(),
std.end());
364 std::vector<float> image_data(1 * 3 * out.cols * out.rows, 0.0f);
365 Mat2Vec(out, image_data.data());
367 images_data.insert(images_data.end(), image_data.begin(), image_data.end());
369 memcpy(img_data_ptr, images_data.data(), images_data.size() *
sizeof(
float));
374bool BEVFORMERObstacleDetector::ImageExtrinsicPreprocess(
375 const CameraFrame *frame, base::BlobPtr<float> input_blob_lidar2img) {
376 if (resize_flag_ ==
false) {
377 resized_lidar2img_.resize(96);
378 for (
int i = 0; i < 96; i++) {
380 resized_lidar2img_.at(i) =
381 frame->k_lidar2img.at(i) * model_param_.
img_scale();
383 resized_lidar2img_.at(i) = frame->k_lidar2img.at(i);
388 memcpy(input_blob_lidar2img->mutable_cpu_data(), resized_lidar2img_.data(),
389 resized_lidar2img_.size() *
sizeof(
float));
393void BEVFORMERObstacleDetector::FillCanBus(
const CameraFrame *frame,
394 base::BlobPtr<float> can_bus_blob) {
402 }
catch (tf2::TransformException &ex) {
404 AERROR <<
"can't find tf! " << query_time;
412 std::vector<double> current_can_bus;
413 std::vector<double> input_can_bus;
414 std::vector<float> input_can_bus_float;
423 for (
int ii = 0; ii < 9; ii++) {
424 current_can_bus.push_back(0.0);
426 Eigen::Vector3d vec(1.0, 0.0, 0.0);
427 vec = rotation.toRotationMatrix() * vec;
428 double yaw = atan2(vec(1), vec(0));
429 yaw = yaw / M_PI * 180;
433 current_can_bus.push_back(yaw / 180 * M_PI);
434 current_can_bus.push_back(yaw);
436 input_can_bus = current_can_bus;
437 if (*use_prev_bev_) {
438 input_can_bus[0] -= last_can_bus_[0];
439 input_can_bus[1] -= last_can_bus_[1];
440 input_can_bus[2] -= last_can_bus_[2];
441 input_can_bus[17] -= last_can_bus_[17];
443 input_can_bus[0] = 0.0;
444 input_can_bus[1] = 0.0;
445 input_can_bus[2] = 0.0;
446 input_can_bus[17] = 0.0;
449 last_can_bus_ = current_can_bus;
450 for (
int ii = 0; ii < input_can_bus.size(); ii++) {
451 input_can_bus_float.push_back(
static_cast<float>(input_can_bus.at(ii)));
459 for (
int ii = 0; ii < 3; ii++) {
461 AERROR <<
"warning, ego-car location is too large!";
465 memcpy(can_bus_blob->mutable_cpu_data(), input_can_bus_float.data(),
466 sizeof(
float) * input_can_bus_float.size());
469void BEVFORMERObstacleDetector::GetObjects(
470 const CameraFrame *frame,
const float *outputs_scores,
471 const float *outputs_coords, std::vector<base::ObjectPtr> *objects) {
473 ACHECK(objects !=
nullptr);
476 for (
int i = class_blob_start_index_; i < class_blob_end_index_; i++) {
477 int current_layer_index = i - class_blob_start_index_;
478 float score = outputs_scores[i];
479 if (score >= score_threshold) {
481 int label = floor(current_layer_index % num_classes_det_);
482 int box_index = current_layer_index / num_classes_det_;
483 int current_layer_box_index =
484 box_blob_start_index_ + box_index * code_size_;
485 float cx = outputs_coords[current_layer_box_index];
486 float cy = outputs_coords[current_layer_box_index + 1];
487 float cz = outputs_coords[current_layer_box_index + 4];
488 float w = exp(outputs_coords[current_layer_box_index + 2]);
489 float l = exp(outputs_coords[current_layer_box_index + 3]);
490 float h = exp(outputs_coords[current_layer_box_index + 5]);
491 float rot = atan2(outputs_coords[current_layer_box_index + 6],
492 outputs_coords[current_layer_box_index + 7]);
495 obj.reset(
new base::Object);
498 obj->sub_type_probs.assign(
500 obj->sub_type_probs[
static_cast<int>(obj->sub_type)] = score;
502 obj->type_probs.assign(
504 obj->type_probs[
static_cast<int>(obj->type)] = score;
505 obj->confidence = score;
512 obj->theta = -rot - M_PI / 2;
513 if (obj->theta > M_PI) {
514 obj->theta = obj->theta - 2 * M_PI;
516 if (obj->theta < -M_PI) {
517 obj->theta = obj->theta + 2 * M_PI;
519 objects->push_back(obj);
520 AINFO <<
"timestamp " << std::to_string(frame->timestamp)
521 <<
" type: " << label <<
" score: " << score <<
" center: " << cx
522 <<
" " << cy <<
" " << cz <<
" lwh: " << l <<
" " << w <<
" " << h
526 AINFO <<
" bevformer detect " << num_objects <<
" objects";
529bool BEVFORMERObstacleDetector::GetOccResults(CameraFrame *frame,
530 const float *outputs_occupancy) {
531 if (frame ==
nullptr) {
534 occ_results_.clear();
535 int valid_occ_point = 0;
536 for (
int voxel_index = 0; voxel_index < num_voxels_; voxel_index++) {
537 float max_prob = 0.0;
538 int class_pred = num_classes_occ_ + 1;
539 for (
int class_index = 0; class_index < num_classes_occ_; class_index++) {
541 outputs_occupancy[voxel_index * num_classes_occ_ + class_index];
542 if (class_prob > max_prob) {
543 max_prob = class_prob;
544 class_pred = class_index;
547 if (max_prob >= occ_threshold_) {
548 occ_results_.push_back(std::make_pair(voxel_index, class_pred));
549 valid_occ_point += 1;
552 AINFO <<
"number of valid occ voxels: " << valid_occ_point;
559 std::to_string(frame->timestamp) +
".bin";
560 std::ofstream out_stream;
561 out_stream.open(out_file);
562 for (
int i = 0; i < occ_results_.size(); i++) {
563 out_stream << occ_results_.at(i).first <<
" " << occ_results_.at(i).second
572 if (frame ==
nullptr) {
577 auto model_inputs = model_param_.
info().inputs();
578 auto input_blob_img =
net_->get_blob(model_inputs[0].name());
579 auto input_blob_prev_bev =
net_->get_blob(model_inputs[1].name());
580 auto input_blob_use_prev_bev =
net_->get_blob(model_inputs[2].name());
581 auto input_blob_can_bus =
net_->get_blob(model_inputs[3].name());
582 auto input_blob_lidar2img =
net_->get_blob(model_inputs[4].name());
583 auto input_blob_no_pad_image_shape =
net_->get_blob(model_inputs[5].name());
584 auto model_outputs = model_param_.
info().outputs();
587 memcpy(input_blob_no_pad_image_shape->mutable_cpu_data(),
588 no_pad_image_shape_.data(),
589 sizeof(
float) * no_pad_image_shape_.size());
590 ImageExtrinsicPreprocess(frame, input_blob_lidar2img);
591 ImagePreprocessGPU(frame, input_blob_img);
592 FillCanBus(frame, input_blob_can_bus);
593 memcpy(input_blob_use_prev_bev->mutable_cpu_data(), use_prev_bev_,
595 if (*use_prev_bev_) {
596 auto bev_embed =
net_->get_blob(model_outputs[0].name());
597 cudaMemcpy(input_blob_prev_bev->mutable_gpu_data(), bev_embed->gpu_data(),
598 prev_bev_size_ *
sizeof(
float), cudaMemcpyHostToDevice);
607 auto outputs_classes =
net_->get_blob(model_outputs[1].name());
608 auto outputs_coords =
net_->get_blob(model_outputs[2].name());
609 const float *outputs_classes_ptr = outputs_classes->cpu_data();
610 const float *outputs_coords_ptr = outputs_coords->cpu_data();
611 GetObjects(frame, outputs_classes_ptr, outputs_coords_ptr,
617 auto outputs_occupancy =
618 net_->get_blob(model_outputs[occ_blob_index_].name());
619 const float *outputs_occupancy_ptr = outputs_occupancy->cpu_data();
620 GetOccResults(frame, outputs_occupancy_ptr);
625 *use_prev_bev_ = 1.0;
629bool BEVFORMERObstacleDetector::Nuscenes2Apollo(
630 std::vector<base::ObjectPtr> *objects) {
631 ACHECK(objects !=
nullptr);
632 for (
auto &obj : *objects) {
636 obj->direction[0] = cosf(obj->theta);
637 obj->direction[1] = sinf(obj->theta);
638 obj->direction[2] = 0;
644 obj->camera_supplement.local_center[0] =
static_cast<float>(obj->center[0]);
645 obj->camera_supplement.local_center[1] =
static_cast<float>(obj->center[1]);
646 obj->camera_supplement.local_center[2] =
static_cast<float>(obj->center[2]);
#define REGISTER_OBSTACLE_DETECTOR(name)
Cyber has builtin time type Time.
A wrapper around SyncedMemory holders serving as the basic computational unit for images,...
virtual bool InitNetwork(const common::ModelInfo &model_info, const std::string &model_root)
Interface for network initialization
std::shared_ptr< inference::Inference > net_
bool GetProtoFromFile(const std::string &file_name, google::protobuf::Message *message)
Parses the content of the file specified by the file_name as a representation of protobufs,...
const std::map< ObjectSubType, ObjectType > kSubType2TypeMap
ObjectSubType mapping
std::shared_ptr< Object > ObjectPtr
const std::map< int, std::string > kIndex2ApolloName
void Normalize(const std::vector< float > &mean, const std::vector< float > &std, float scale, cv::Mat *img)
Image normalize function
cv::Mat GetImage(const std::string &path, bool to_rgb)
const std::map< std::string, base::ObjectSubType > kNuScenesName2SubTypeMap
const std::map< std::string, base::ObjectSubType > kApolloName2SubTypeMap
const std::map< int, std::string > kIndex2NuScenesName
void BevFormerNormalize(const std::vector< float > &mean, const std::vector< float > &std, float scale, cv::Mat *img)
Image normalize function
void TransformToPCLXYZI(const base::AttributePointCloud< PointT > &org_cloud, const pcl::PointCloud< pcl::PointXYZI >::Ptr &out_cloud_ptr)
std::string GetConfigFile(const std::string &config_path, const std::string &config_file)
std::string GetModelPath(const std::string &model_name)
Get the model path by model name, search from APOLLO_MODEL_PATH
Contains a number of helper functions related to quaternions.
std::vector< base::ObjectPtr > detected_objects