Browse Source

Support resampling (#135)

Fangjun Kuang 2 năm trước cách đây
mục cha
commit
25bbebf788

+ 4 - 2
sherpa-ncnn/c-api/c-api.h

@@ -149,8 +149,10 @@ void DestroyStream(SherpaNcnnStream *s);
 /// Accept input audio samples and compute the features.
 ///
 /// @param s  A pointer returned by CreateStream().
-/// @param sample_rate  Sampler rate of the input samples. It has to be 16 kHz
-///                     for models from icefall.
+/// @param sample_rate  Sample rate of the input samples. If it is different
+///                     from feat_config.sampling_rate, we will do resample.
+///                     Caution: You MUST not use a different sampling_rate
+///                     across different calls to AcceptWaveform()
 /// @param samples A pointer to a 1-D array containing audio samples.
 ///                The range of samples has to be normalized to [-1, 1].
 /// @param n  Number of elements in the samples array.

+ 43 - 2
sherpa-ncnn/csrc/features.cc

@@ -24,6 +24,7 @@
 
 #include "kaldi-native-fbank/csrc/online-feature.h"
 #include "mat.h"  // NOLINT
+#include "sherpa-ncnn/csrc/resample.h"
 
 namespace sherpa_ncnn {
 
@@ -52,8 +53,47 @@ class FeatureExtractor::Impl {
     fbank_ = std::make_unique<knf::OnlineFbank>(opts_);
   }
 
-  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n) {
+  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
     std::lock_guard<std::mutex> lock(mutex_);
+    if (resampler_) {
+      if (sampling_rate != resampler_->GetInputSamplingRate()) {
+        NCNN_LOGE(
+            "You changed the input sampling rate!! Expected: %d, given: "
+            "%d",
+            resampler_->GetInputSamplingRate(), sampling_rate);
+        exit(-1);
+      }
+
+      std::vector<float> samples;
+      resampler_->Resample(waveform, n, false, &samples);
+      fbank_->AcceptWaveform(opts_.frame_opts.samp_freq, samples.data(),
+                             samples.size());
+      return;
+    }
+
+    if (sampling_rate != opts_.frame_opts.samp_freq) {
+      NCNN_LOGE(
+          "Creating a resampler:\n"
+          "   in_sample_rate: %d\n"
+          "   output_sample_rate: %d\n",
+          sampling_rate, static_cast<int32_t>(opts_.frame_opts.samp_freq));
+
+      float min_freq =
+          std::min<int32_t>(sampling_rate, opts_.frame_opts.samp_freq);
+      float lowpass_cutoff = 0.99 * 0.5 * min_freq;
+
+      int32_t lowpass_filter_width = 6;
+      resampler_ = std::make_unique<LinearResample>(
+          sampling_rate, opts_.frame_opts.samp_freq, lowpass_cutoff,
+          lowpass_filter_width);
+
+      std::vector<float> samples;
+      resampler_->Resample(waveform, n, false, &samples);
+      fbank_->AcceptWaveform(opts_.frame_opts.samp_freq, samples.data(),
+                             samples.size());
+      return;
+    }
+
     fbank_->AcceptWaveform(sampling_rate, waveform, n);
   }
 
@@ -95,6 +135,7 @@ class FeatureExtractor::Impl {
   std::unique_ptr<knf::OnlineFbank> fbank_;
   knf::FbankOptions opts_;
   mutable std::mutex mutex_;
+  std::unique_ptr<LinearResample> resampler_;
 };
 
 FeatureExtractor::FeatureExtractor(const FeatureExtractorConfig &config)
@@ -102,7 +143,7 @@ FeatureExtractor::FeatureExtractor(const FeatureExtractorConfig &config)
 
 FeatureExtractor::~FeatureExtractor() = default;
 
-void FeatureExtractor::AcceptWaveform(float sampling_rate,
+void FeatureExtractor::AcceptWaveform(int32_t sampling_rate,
                                       const float *waveform, int32_t n) {
   impl_->AcceptWaveform(sampling_rate, waveform, n);
 }

+ 7 - 4
sherpa-ncnn/csrc/features.h

@@ -29,7 +29,7 @@ class Mat;
 namespace sherpa_ncnn {
 
 struct FeatureExtractorConfig {
-  float sampling_rate = 16000;
+  int32_t sampling_rate = 16000;
   int32_t feature_dim = 80;
 
   // 100 hundred frames per second
@@ -45,12 +45,15 @@ class FeatureExtractor {
   ~FeatureExtractor();
 
   /**
-     @param sampling_rate The sampling_rate of the input waveform. Should match
-                          the one expected by the feature extractor.
+     @param sampling_rate The sampling_rate of the input waveform. We will
+                          do resample if it is different from
+                          config.sampling_rate.
+                          Caution: You MUST not use a different sampling rate
+                          across different calls for AcceptWaveform().
      @param waveform Pointer to a 1-D array of size n
      @param n Number of entries in waveform
    */
-  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
+  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n);
 
   // InputFinished() tells the class you won't be providing any
   // more waveform.  This will help flush out the last frame or two

+ 2 - 2
sherpa-ncnn/csrc/stream.cc

@@ -25,7 +25,7 @@ class Stream::Impl {
   explicit Impl(const FeatureExtractorConfig &config)
       : feat_extractor_(config) {}
 
-  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n) {
+  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
     feat_extractor_.AcceptWaveform(sampling_rate, waveform, n);
   }
 
@@ -71,7 +71,7 @@ Stream::Stream(const FeatureExtractorConfig &config)
 
 Stream::~Stream() = default;
 
-void Stream::AcceptWaveform(float sampling_rate, const float *waveform,
+void Stream::AcceptWaveform(int32_t sampling_rate, const float *waveform,
                             int32_t n) {
   impl_->AcceptWaveform(sampling_rate, waveform, n);
 }

+ 6 - 3
sherpa-ncnn/csrc/stream.h

@@ -32,12 +32,15 @@ class Stream {
   ~Stream();
 
   /**
-     @param sampling_rate The sampling_rate of the input waveform. Should match
-                          the one expected by the feature extractor.
+     @param sampling_rate The sampling_rate of the input waveform. We will
+                          do resample if it is different from
+                          config.sampling_rate.
+                          Caution: You MUST not use a different sampling rate
+                          across different calls for AcceptWaveform().
      @param waveform Pointer to a 1-D array of size n
      @param n Number of entries in waveform
    */
-  void AcceptWaveform(float sampling_rate, const float *waveform, int32_t n);
+  void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n);
 
   /**
    * InputFinished() tells the class you won't be providing any

+ 5 - 3
swift-api-examples/SherpaNcnn.swift

@@ -188,9 +188,11 @@ class SherpaNcnnRecognizer {
     ///
     /// - Parameters:
     ///   - samples: Audio samples normalzed to the range [-1, 1]
-    ///   - sampleRate: Sample rate of the input audio samples. Must match
-    ///                 the one expected by the model. It must be 16000 for
-    ///                 models from icefall.
+    ///   - sampleRate: Sample rate of the input audio samples. If it is
+    ///                 different from featConfig.sampleRate, we will do
+    ///                 resample. Caution: You cannot use a different
+    ///                 sampleRate across different calls to
+    ///                 AcceptWaveform().
     func acceptWaveform(samples: [Float], sampleRate: Float = 16000) {
         AcceptWaveform(stream, sampleRate, samples, Int32(samples.count))
     }