Sfoglia il codice sorgente

Support int32_t samples for sherpa-ncnn-alsa (#234)

Fangjun Kuang 2 anni fa
parent
commit
655a99b9c2
3 ha cambiato i file con 125 aggiunte e 18 eliminazioni
  1. 9 0
      CMakeLists.txt
  2. 97 15
      sherpa-ncnn/csrc/alsa.cc
  3. 19 3
      sherpa-ncnn/csrc/alsa.h

+ 9 - 0
CMakeLists.txt

@@ -83,6 +83,15 @@ include(CheckIncludeFileCXX)
 check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA)
 if(SHERPA_NCNN_HAS_ALSA)
   add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1)
+else()
+  message(WARNING "\
+Could not find alsa/asoundlib.h !
+We won't build sherpa-ncnn-alsa
+To fix that, please do:
+  (1) sudo apt-get install alsa-utils libasound2-dev
+  (2) rm -rf build
+  (3) re-try
+")
 endif()
 
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)

+ 97 - 15
sherpa-ncnn/csrc/alsa.cc

@@ -21,18 +21,29 @@
 #include "sherpa-ncnn/csrc/alsa.h"
 
 #include <algorithm>
+#include <cstdlib>
 
 #include "alsa/asoundlib.h"
 
 namespace sherpa_ncnn {
 
-void ToFloat(const std::vector<int16_t> &in, int32_t num_channels,
-             std::vector<float> *out) {
+void ToFloat16(const std::vector<int16_t> &in, int32_t channel_to_use,
+               int32_t num_channels, std::vector<float> *out) {
   out->resize(in.size() / num_channels);
 
   int32_t n = in.size();
   for (int32_t i = 0, k = 0; i < n; i += num_channels, ++k) {
-    (*out)[k] = in[i] / 32768.;
+    (*out)[k] = in[i + channel_to_use] / 32768.0;
+  }
+}
+
+void ToFloat32(const std::vector<int32_t> &in, int32_t channel_to_use,
+               int32_t num_channels, std::vector<float> *out) {
+  out->resize(in.size() / num_channels);
+
+  int32_t n = in.size();
+  for (int32_t i = 0, k = 0; i < n; i += num_channels, ++k) {
+    (*out)[k] = in[i + channel_to_use] / float(1 << 31);
   }
 }
 
@@ -82,8 +93,21 @@ and if you want to select card 3 and the device 0 on that card, please use:
   err = snd_pcm_hw_params_set_format(capture_handle_, hw_params,
                                      SND_PCM_FORMAT_S16_LE);
   if (err) {
-    fprintf(stderr, "Failed to set format: %s\n", snd_strerror(err));
-    exit(-1);
+    fprintf(stderr, "Failed to set format to SND_PCM_FORMAT_S16_LE: %s\n",
+            snd_strerror(err));
+
+    // now try to use SND_PCM_FORMAT_S32_LE
+    fprintf(stderr, "Trying to set format to SND_PCM_FORMAT_S32_LE\n");
+
+    err = snd_pcm_hw_params_set_format(capture_handle_, hw_params,
+                                       SND_PCM_FORMAT_S32_LE);
+    if (err) {
+      fprintf(stderr, "Failed to set format to SND_PCM_FORMAT_S32_LE: %s\n",
+              snd_strerror(err));
+      exit(-1);
+    }
+    fprintf(stderr, "Set format to SND_PCM_FORMAT_S32_LE successfully\n");
+    pcm_format_ = 32;
   }
 
   std::vector<int32_t> possible_channels = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
@@ -105,12 +129,29 @@ and if you want to select card 3 and the device 0 on that card, please use:
   }
 
   if (actual_channel_count_ > 1) {
-    fprintf(stderr, "We use only the first channel out of %d channels\n",
-            actual_channel_count_);
+    const char *p = std::getenv("SHERPA_NCNN_ALSA_USE_CHANNEL");
+    if (p != nullptr) {
+      int32_t channel_to_use = atoi(p);
+      if (channel_to_use < 0 || channel_to_use >= actual_channel_count_) {
+        fprintf(stderr, "Invalid SHERPA_NCNN_ALSA_USE_CHANNEL: %s\n", p);
+        exit(-1);
+      }
+
+      channel_to_use_ = channel_to_use;
+    }
+
+    fprintf(stderr, "We use only channel %d out of %d channels\n",
+            channel_to_use_, actual_channel_count_);
 
     fprintf(stderr,
-            "Please use arecord and audacity to check that channel 0 indeed "
-            "contains audio samples\n");
+            "Please use arecord and audacity to check that channel %d indeed "
+            "contains audio samples\n",
+            channel_to_use_);
+    fprintf(stderr,
+            "Hint: You can use\n"
+            "  export SHERPA_NCNN_ALSA_USE_CHANNEL=1\n"
+            "to use channel 1 out of %d channels\n",
+            actual_channel_count_);
   }
 
   uint32_t actual_sample_rate = expected_sample_rate_;
@@ -162,11 +203,40 @@ and if you want to select card 3 and the device 0 on that card, please use:
 
 Alsa::~Alsa() { snd_pcm_close(capture_handle_); }
 
-const std::vector<float> &Alsa::Read(int32_t num_samples) {
-  samples_.resize(num_samples * actual_channel_count_);
+const std::vector<float> &Alsa::Read16(int32_t num_samples) {
+  samples16_.resize(num_samples * actual_channel_count_);
+
+  // count is in frames. Each frame contains actual_channel_count_ samples
+  int32_t count =
+      snd_pcm_readi(capture_handle_, samples16_.data(), num_samples);
+  if (count == -EPIPE) {
+    fprintf(
+        stderr,
+        "An overrun occurred, which means the RTF of the current "
+        "model on your board is larger than 1. You can use ./bin/sherpa-ncnn "
+        "to verify that. Please select a smaller model whose RTF is less than "
+        "1 for your board.");
+    exit(-1);
+  }
+
+  samples16_.resize(count * actual_channel_count_);
+
+  ToFloat16(samples16_, channel_to_use_, actual_channel_count_, &samples1_);
+
+  if (!resampler_) {
+    return samples1_;
+  }
+
+  resampler_->Resample(samples1_.data(), samples16_.size(), false, &samples2_);
+  return samples2_;
+}
+
+const std::vector<float> &Alsa::Read32(int32_t num_samples) {
+  samples32_.resize(num_samples * actual_channel_count_);
 
   // count is in frames. Each frame contains actual_channel_count_ samples
-  int32_t count = snd_pcm_readi(capture_handle_, samples_.data(), num_samples);
+  int32_t count =
+      snd_pcm_readi(capture_handle_, samples32_.data(), num_samples);
   if (count == -EPIPE) {
     fprintf(
         stderr,
@@ -177,18 +247,30 @@ const std::vector<float> &Alsa::Read(int32_t num_samples) {
     exit(-1);
   }
 
-  samples_.resize(count * actual_channel_count_);
+  samples32_.resize(count * actual_channel_count_);
 
-  ToFloat(samples_, actual_channel_count_, &samples1_);
+  ToFloat32(samples32_, channel_to_use_, actual_channel_count_, &samples1_);
 
   if (!resampler_) {
     return samples1_;
   }
 
-  resampler_->Resample(samples1_.data(), samples_.size(), false, &samples2_);
+  resampler_->Resample(samples1_.data(), samples32_.size(), false, &samples2_);
   return samples2_;
 }
 
+const std::vector<float> &Alsa::Read(int32_t num_samples) {
+  switch (pcm_format_) {
+    case 16:
+      return Read16(num_samples);
+    case 32:
+      return Read32(num_samples);
+    default:
+      fprintf(stderr, "Unsupported pcm format: %d\n", pcm_format_);
+      exit(-1);
+  }
+}
+
 }  // namespace sherpa_ncnn
 
 #endif

+ 19 - 3
sherpa-ncnn/csrc/alsa.h

@@ -42,6 +42,10 @@ class Alsa {
   int32_t GetExpectedSampleRate() const { return expected_sample_rate_; }
   int32_t GetActualSampleRate() const { return actual_sample_rate_; }
 
+ private:
+  const std::vector<float> &Read16(int32_t num_samples);
+  const std::vector<float> &Read32(int32_t num_samples);
+
  private:
   snd_pcm_t *capture_handle_;
   int32_t expected_sample_rate_ = 16000;
@@ -49,10 +53,22 @@ class Alsa {
 
   int32_t actual_channel_count_ = 1;
 
+  // If there are multipel channels, we use this channel for recognition
+  int32_t channel_to_use_ = 0;
+
   std::unique_ptr<LinearResample> resampler_;
-  std::vector<int16_t> samples_;  // directly from the microphone
-  std::vector<float> samples1_;   // normalized version of samples_
-  std::vector<float> samples2_;   // possibly resampled from samples1_
+
+  // If it is 16, we use samples16_
+  // If it is 32, we use samples32_
+  //
+  // It can only be 16 or 32.
+  int32_t pcm_format_ = 16;
+
+  std::vector<int16_t> samples16_;  // directly from the microphone
+  std::vector<int32_t> samples32_;  // directly from the microphone
+
+  std::vector<float> samples1_;  // normalized version of samples_
+  std::vector<float> samples2_;  // possibly resampled from samples1_
 };
 
 }  // namespace sherpa_ncnn