Apollo 10.0
自动驾驶开放平台
direction_detection.cc
浏览该文件的文档.
1/******************************************************************************
2 * Copyright 2020 The Apollo Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *****************************************************************************/
16
18
19#include "yaml-cpp/yaml.h"
20
21#include "cyber/common/file.h"
22#include "cyber/common/log.h"
24
25#if __has_include("torch/version.h")
26#include "torch/version.h"
27#endif
28
29namespace apollo {
30namespace audio {
31
33using torch::indexing::None;
34using torch::indexing::Slice;
35
37
39
40std::pair<Point3D, double> DirectionDetection::EstimateSoundSource(
41 std::vector<std::vector<double>>&& channels_vec,
42 const std::string& respeaker_extrinsic_file, const int sample_rate,
43 const double mic_distance) {
44 if (!respeaker2imu_ptr_.get()) {
45 respeaker2imu_ptr_.reset(new Eigen::Matrix4d);
46 LoadExtrinsics(respeaker_extrinsic_file, respeaker2imu_ptr_.get());
47 }
48 double degree =
49 EstimateDirection(move(channels_vec), sample_rate, mic_distance);
50 Eigen::Vector4d source_position(kDistance * sin(degree),
51 kDistance * cos(degree), 0, 1);
52 source_position = (*respeaker2imu_ptr_) * source_position;
53
54 Point3D source_position_p3d;
55 source_position_p3d.set_x(source_position[0]);
56 source_position_p3d.set_y(source_position[1]);
57 source_position_p3d.set_z(source_position[2]);
58 degree = NormalizeAngle(degree);
59 return {source_position_p3d, degree};
60}
61
62double DirectionDetection::EstimateDirection(
63 std::vector<std::vector<double>>&& channels_vec, const int sample_rate,
64 const double mic_distance) {
65 std::vector<torch::Tensor> channels_ts;
66 auto options = torch::TensorOptions().dtype(torch::kFloat64);
67 int size = static_cast<int>(channels_vec[0].size());
68 for (auto& signal : channels_vec) {
69 channels_ts.push_back(torch::from_blob(signal.data(), {size}, options));
70 }
71
72 double tau0, tau1;
73 double theta0, theta1;
74 const double max_tau = mic_distance / kSoundSpeed;
75 tau0 = GccPhat(channels_ts[0], channels_ts[2], sample_rate, max_tau, 1);
76 theta0 = asin(tau0 / max_tau) * 180 / M_PI;
77 tau1 = GccPhat(channels_ts[1], channels_ts[3], sample_rate, max_tau, 1);
78 theta1 = asin(tau1 / max_tau) * 180 / M_PI;
79
80 int best_guess = 0;
81 if (fabs(theta0) < fabs(theta1)) {
82 best_guess = theta1 > 0 ? std::fmod(theta0 + 360, 360) : (180 - theta0);
83 } else {
84 best_guess = theta0 < 0 ? std::fmod(theta1 + 360, 360) : (180 - theta1);
85 best_guess = (best_guess + 90 + 180) % 360;
86 }
87 best_guess = (-best_guess + 480) % 360;
88
89 return static_cast<double>(best_guess) / 180 * M_PI;
90}
91
92bool DirectionDetection::LoadExtrinsics(const std::string& yaml_file,
93 Eigen::Matrix4d* respeaker_extrinsic) {
94 if (!apollo::cyber::common::PathExists(yaml_file)) {
95 AINFO << yaml_file << " does not exist!";
96 return false;
97 }
98 YAML::Node node = YAML::LoadFile(yaml_file);
99 double qw = 0.0;
100 double qx = 0.0;
101 double qy = 0.0;
102 double qz = 0.0;
103 double tx = 0.0;
104 double ty = 0.0;
105 double tz = 0.0;
106 try {
107 if (node.IsNull()) {
108 AINFO << "Load " << yaml_file << " failed! please check!";
109 return false;
110 }
111 qw = node["transform"]["rotation"]["w"].as<double>();
112 qx = node["transform"]["rotation"]["x"].as<double>();
113 qy = node["transform"]["rotation"]["y"].as<double>();
114 qz = node["transform"]["rotation"]["z"].as<double>();
115 tx = node["transform"]["translation"]["x"].as<double>();
116 ty = node["transform"]["translation"]["y"].as<double>();
117 tz = node["transform"]["translation"]["z"].as<double>();
118 } catch (YAML::Exception& e) {
119 AERROR << "load camera extrinsic file " << yaml_file
120 << " with error, YAML exception:" << e.what();
121 return false;
122 }
123 respeaker_extrinsic->setConstant(0);
124 Eigen::Quaterniond q;
125 q.x() = qx;
126 q.y() = qy;
127 q.z() = qz;
128 q.w() = qw;
129 (*respeaker_extrinsic).block<3, 3>(0, 0) = q.normalized().toRotationMatrix();
130 (*respeaker_extrinsic)(0, 3) = tx;
131 (*respeaker_extrinsic)(1, 3) = ty;
132 (*respeaker_extrinsic)(2, 3) = tz;
133 (*respeaker_extrinsic)(3, 3) = 1;
134 return true;
135}
136
137double DirectionDetection::GccPhat(const torch::Tensor& sig,
138 const torch::Tensor& refsig, int fs,
139 double max_tau, int interp) {
140 const int n_sig = sig.size(0), n_refsig = refsig.size(0),
141 n = n_sig + n_refsig;
142 torch::Tensor psig = at::constant_pad_nd(sig, {0, n_refsig}, 0);
143 torch::Tensor prefsig = at::constant_pad_nd(refsig, {0, n_sig}, 0);
144#if TORCH_VERSION_MINOR <= 7
145 psig = at::rfft(psig, 1, false, true);
146 prefsig = at::rfft(prefsig, 1, false, true);
147#else
148 auto psig_complex = at::fft_rfft(psig, c10::nullopt, -1, c10::nullopt);
149 psig = at::stack({torch::real(psig_complex), torch::imag(psig_complex)}, -1);
150
151 auto prefsig_complex = at::fft_rfft(prefsig, c10::nullopt, -1, c10::nullopt);
152 prefsig = at::stack(
153 {torch::real(prefsig_complex), torch::imag(prefsig_complex)}, -1);
154#endif
155
156 ConjugateTensor(&prefsig);
157 torch::Tensor r = ComplexMultiply(psig, prefsig);
158#if TORCH_VERSION_MINOR <= 7
159 torch::Tensor cc =
160 at::irfft(r / ComplexAbsolute(r), 1, false, true, {interp * n});
161#else
162 auto irfft_input_transpose = at::transpose(r / ComplexAbsolute(r), 0, 1);
163 auto irfft_complex =
164 torch::complex(irfft_input_transpose[0], irfft_input_transpose[1]);
165 torch::Tensor cc =
166 torch::real(torch::fft::irfft(irfft_complex, n, -1, c10::nullopt));
167#endif
168 int max_shift = static_cast<int>(interp * n / 2);
169 if (max_tau != 0)
170 max_shift = std::min(static_cast<int>(interp * fs * max_tau), max_shift);
171
172 auto begin = cc.index({Slice(cc.size(0) - max_shift, None)});
173 auto end = cc.index({Slice(None, max_shift + 1)});
174 cc = at::cat({begin, end});
175 // find max cross correlation index
176 const int shift = at::argmax(at::abs(cc), 0).item<int>() - max_shift;
177 const double tau = shift / static_cast<double>(interp * fs);
178
179 return tau;
180}
181
182void DirectionDetection::ConjugateTensor(torch::Tensor* tensor) {
183 tensor->index_put_({"...", 1}, -tensor->index({"...", 1}));
184}
185
186torch::Tensor DirectionDetection::ComplexMultiply(const torch::Tensor& a,
187 const torch::Tensor& b) {
188 torch::Tensor real = a.index({"...", 0}) * b.index({"...", 0}) -
189 a.index({"...", 1}) * b.index({"...", 1});
190 torch::Tensor imag = a.index({"...", 0}) * b.index({"...", 1}) +
191 a.index({"...", 1}) * b.index({"...", 0});
192 return at::cat({real.reshape({-1, 1}), imag.reshape({-1, 1})}, 1);
193}
194
195torch::Tensor DirectionDetection::ComplexAbsolute(const torch::Tensor& tensor) {
196 torch::Tensor res = tensor * tensor;
197 res = at::sqrt(res.sum(1)).reshape({-1, 1});
198
199 return res;
200}
201
202} // namespace audio
203} // namespace apollo
std::pair< Point3D, double > EstimateSoundSource(std::vector< std::vector< double > > &&channels_vec, const std::string &respeaker_extrinsic_file, const int sample_rate, const double mic_distance)
#define AERROR
Definition log.h:44
#define AINFO
Definition log.h:42
Math-related util functions.
double NormalizeAngle(const double angle)
Normalize angle to [-PI, PI).
Definition math_utils.cc:53
bool PathExists(const std::string &path)
Check if the path exists.
Definition file.cc:195
class register implement
Definition arena_queue.h:37