2 năm trước cách đây · 25bbebf788
--- a/sherpa-ncnn/c-api/c-api.h
+++ b/sherpa-ncnn/c-api/c-api.h
@@ -149,8 +149,10 @@ void DestroyStream(SherpaNcnnStream *s);
 
				 /// Accept input audio samples and compute the features.
			
 
				 ///
			
 
				 /// @param s  A pointer returned by CreateStream().
			
 
				-/// @param sample_rate  Sampler rate of the input samples. It has to be 16 kHz
			
 
				-///                     for models from icefall.
			
 
				+/// @param sample_rate  Sample rate of the input samples. If it is different
			
 
				+///                     from feat_config.sampling_rate, we will do resample.
			
 
				+///                     Caution: You MUST not use a different sampling_rate
			
 
				+///                     across different calls to AcceptWaveform()
			
 
				 /// @param samples A pointer to a 1-D array containing audio samples.
			
 
				 ///                The range of samples has to be normalized to [-1, 1].
			
 
				 /// @param n  Number of elements in the samples array.
			
--- a/sherpa-ncnn/csrc/features.cc
+++ b/sherpa-ncnn/csrc/features.cc
@@ -24,6 +24,7 @@
 
				 
			
 
				 #include "kaldi-native-fbank/csrc/online-feature.h"
			
 
				 #include "mat.h"  // NOLINT
			
 
				+#include "sherpa-ncnn/csrc/resample.h"
			
 
				 
			
 
				 namespace sherpa_ncnn {
			
 
				 
			
@@ -52,8 +53,47 @@ class FeatureExtractor::Impl {
 
				     fbank_ = std::make_unique<knf::OnlineFbank>(opts_);
			
 
				   }
			
 
				 
			
 
				-  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n) {
			
 
				+  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
			
 
				     std::lock_guard<std::mutex> lock(mutex_);
			
 
				+    if (resampler_) {
			
 
				+      if (sampling_rate != resampler_->GetInputSamplingRate()) {
			
 
				+        NCNN_LOGE(
			
 
				+            "You changed the input sampling rate!! Expected: %d, given: "
			
 
				+            "%d",
			
 
				+            resampler_->GetInputSamplingRate(), sampling_rate);
			
 
				+        exit(-1);
			
 
				+      }
			
 
				+
			
 
				+      std::vector<float> samples;
			
 
				+      resampler_->Resample(waveform, n, false, &samples);
			
 
				+      fbank_->AcceptWaveform(opts_.frame_opts.samp_freq, samples.data(),
			
 
				+                             samples.size());
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    if (sampling_rate != opts_.frame_opts.samp_freq) {
			
 
				+      NCNN_LOGE(
			
 
				+          "Creating a resampler:\n"
			
 
				+          "   in_sample_rate: %d\n"
			
 
				+          "   output_sample_rate: %d\n",
			
 
				+          sampling_rate, static_cast<int32_t>(opts_.frame_opts.samp_freq));
			
 
				+
			
 
				+      float min_freq =
			
 
				+          std::min<int32_t>(sampling_rate, opts_.frame_opts.samp_freq);
			
 
				+      float lowpass_cutoff = 0.99 * 0.5 * min_freq;
			
 
				+
			
 
				+      int32_t lowpass_filter_width = 6;
			
 
				+      resampler_ = std::make_unique<LinearResample>(
			
 
				+          sampling_rate, opts_.frame_opts.samp_freq, lowpass_cutoff,
			
 
				+          lowpass_filter_width);
			
 
				+
			
 
				+      std::vector<float> samples;
			
 
				+      resampler_->Resample(waveform, n, false, &samples);
			
 
				+      fbank_->AcceptWaveform(opts_.frame_opts.samp_freq, samples.data(),
			
 
				+                             samples.size());
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				     fbank_->AcceptWaveform(sampling_rate, waveform, n);
			
 
				   }
			
 
				 
			
@@ -95,6 +135,7 @@ class FeatureExtractor::Impl {
 
				   std::unique_ptr<knf::OnlineFbank> fbank_;
			
 
				   knf::FbankOptions opts_;
			
 
				   mutable std::mutex mutex_;
			
 
				+  std::unique_ptr<LinearResample> resampler_;
			
 
				 };
			
 
				 
			
 
				 FeatureExtractor::FeatureExtractor(const FeatureExtractorConfig &config)
			
@@ -102,7 +143,7 @@ FeatureExtractor::FeatureExtractor(const FeatureExtractorConfig &config)
 
				 
			
 
				 FeatureExtractor::~FeatureExtractor() = default;
			
 
				 
			
 
				-void FeatureExtractor::AcceptWaveform(float sampling_rate,
			
 
				+void FeatureExtractor::AcceptWaveform(int32_t sampling_rate,
			
 
				                                       const float *waveform, int32_t n) {
			
 
				   impl_->AcceptWaveform(sampling_rate, waveform, n);
			
 
				 }
			
--- a/sherpa-ncnn/csrc/features.h
+++ b/sherpa-ncnn/csrc/features.h
@@ -29,7 +29,7 @@ class Mat;
 
				 namespace sherpa_ncnn {
			
 
				 
			
 
				 struct FeatureExtractorConfig {
			
 
				-  float sampling_rate = 16000;
			
 
				+  int32_t sampling_rate = 16000;
			
 
				   int32_t feature_dim = 80;
			
 
				 
			
 
				   // 100 hundred frames per second
			
@@ -45,12 +45,15 @@ class FeatureExtractor {
 
				   ~FeatureExtractor();
			
 
				 
			
 
				   /**
			
 
				-     @param sampling_rate The sampling_rate of the input waveform. Should match
			
 
				-                          the one expected by the feature extractor.
			
 
				+     @param sampling_rate The sampling_rate of the input waveform. We will
			
 
				+                          do resample if it is different from
			
 
				+                          config.sampling_rate.
			
 
				+                          Caution: You MUST not use a different sampling rate
			
 
				+                          across different calls for AcceptWaveform().
			
 
				      @param waveform Pointer to a 1-D array of size n
			
 
				      @param n Number of entries in waveform
			
 
				    */
			
 
				-  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
			
 
				+  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n);
			
 
				 
			
 
				   // InputFinished() tells the class you won't be providing any
			
 
				   // more waveform.  This will help flush out the last frame or two
			
--- a/sherpa-ncnn/csrc/stream.cc
+++ b/sherpa-ncnn/csrc/stream.cc
@@ -25,7 +25,7 @@ class Stream::Impl {
 
				   explicit Impl(const FeatureExtractorConfig &config)
			
 
				       : feat_extractor_(config) {}
			
 
				 
			
 
				-  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n) {
			
 
				+  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
			
 
				     feat_extractor_.AcceptWaveform(sampling_rate, waveform, n);
			
 
				   }
			
 
				 
			
@@ -71,7 +71,7 @@ Stream::Stream(const FeatureExtractorConfig &config)
 
				 
			
 
				 Stream::~Stream() = default;
			
 
				 
			
 
				-void Stream::AcceptWaveform(float sampling_rate, const float *waveform,
			
 
				+void Stream::AcceptWaveform(int32_t sampling_rate, const float *waveform,
			
 
				                             int32_t n) {
			
 
				   impl_->AcceptWaveform(sampling_rate, waveform, n);
			
 
				 }
			
--- a/sherpa-ncnn/csrc/stream.h
+++ b/sherpa-ncnn/csrc/stream.h
@@ -32,12 +32,15 @@ class Stream {
 
				   ~Stream();
			
 
				 
			
 
				   /**
			
 
				-     @param sampling_rate The sampling_rate of the input waveform. Should match
			
 
				-                          the one expected by the feature extractor.
			
 
				+     @param sampling_rate The sampling_rate of the input waveform. We will
			
 
				+                          do resample if it is different from
			
 
				+                          config.sampling_rate.
			
 
				+                          Caution: You MUST not use a different sampling rate
			
 
				+                          across different calls for AcceptWaveform().
			
 
				      @param waveform Pointer to a 1-D array of size n
			
 
				      @param n Number of entries in waveform
			
 
				    */
			
 
				-  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
			
 
				+  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n);
			
 
				 
			
 
				   /**
			
 
				    * InputFinished() tells the class you won't be providing any
			
--- a/swift-api-examples/SherpaNcnn.swift
+++ b/swift-api-examples/SherpaNcnn.swift
@@ -188,9 +188,11 @@ class SherpaNcnnRecognizer {
 
				     ///
			
 
				     /// - Parameters:
			
 
				     ///   - samples: Audio samples normalzed to the range [-1, 1]
			
 
				-    ///   - sampleRate: Sample rate of the input audio samples. Must match
			
 
				-    ///                 the one expected by the model. It must be 16000 for
			
 
				-    ///                 models from icefall.
			
 
				+    ///   - sampleRate: Sample rate of the input audio samples. If it is
			
 
				+    ///                 different from featConfig.sampleRate, we will do
			
 
				+    ///                 resample. Caution: You cannot use a different
			
 
				+    ///                 sampleRate across different calls to
			
 
				+    ///                 AcceptWaveform().
			
 
				     func acceptWaveform(samples: [Float], sampleRate: Float = 16000) {
			
 
				         AcceptWaveform(stream, sampleRate, samples, Int32(samples.count))
			
 
				     }