2 anni fa · 655a99b9c2
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,6 +83,15 @@ include(CheckIncludeFileCXX)
 
				 check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA)
			
 
				 if(SHERPA_NCNN_HAS_ALSA)
			
 
				   add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1)
			
 
				+else()
			
 
				+  message(WARNING "\
			
 
				+Could not find alsa/asoundlib.h !
			
 
				+We won't build sherpa-ncnn-alsa
			
 
				+To fix that, please do:
			
 
				+  (1) sudo apt-get install alsa-utils libasound2-dev
			
 
				+  (2) rm -rf build
			
 
				+  (3) re-try
			
 
				+")
			
 
				 endif()
			
 
				 
			
 
				 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
			
--- a/sherpa-ncnn/csrc/alsa.cc
+++ b/sherpa-ncnn/csrc/alsa.cc
@@ -21,18 +21,29 @@
 
				 #include "sherpa-ncnn/csrc/alsa.h"
			
 
				 
			
 
				 #include <algorithm>
			
 
				+#include <cstdlib>
			
 
				 
			
 
				 #include "alsa/asoundlib.h"
			
 
				 
			
 
				 namespace sherpa_ncnn {
			
 
				 
			
 
				-void ToFloat(const std::vector<int16_t> &in, int32_t num_channels,
			
 
				-             std::vector<float> *out) {
			
 
				+void ToFloat16(const std::vector<int16_t> &in, int32_t channel_to_use,
			
 
				+               int32_t num_channels, std::vector<float> *out) {
			
 
				   out->resize(in.size() / num_channels);
			
 
				 
			
 
				   int32_t n = in.size();
			
 
				   for (int32_t i = 0, k = 0; i < n; i += num_channels, ++k) {
			
 
				-    (*out)[k] = in[i] / 32768.;
			
 
				+    (*out)[k] = in[i + channel_to_use] / 32768.0;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void ToFloat32(const std::vector<int32_t> &in, int32_t channel_to_use,
			
 
				+               int32_t num_channels, std::vector<float> *out) {
			
 
				+  out->resize(in.size() / num_channels);
			
 
				+
			
 
				+  int32_t n = in.size();
			
 
				+  for (int32_t i = 0, k = 0; i < n; i += num_channels, ++k) {
			
 
				+    (*out)[k] = in[i + channel_to_use] / float(1 << 31);
			
 
				   }
			
 
				 }
			
 
				 
			
@@ -82,8 +93,21 @@ and if you want to select card 3 and the device 0 on that card, please use:
 
				   err = snd_pcm_hw_params_set_format(capture_handle_, hw_params,
			
 
				                                      SND_PCM_FORMAT_S16_LE);
			
 
				   if (err) {
			
 
				-    fprintf(stderr, "Failed to set format: %s\n", snd_strerror(err));
			
 
				-    exit(-1);
			
 
				+    fprintf(stderr, "Failed to set format to SND_PCM_FORMAT_S16_LE: %s\n",
			
 
				+            snd_strerror(err));
			
 
				+
			
 
				+    // now try to use SND_PCM_FORMAT_S32_LE
			
 
				+    fprintf(stderr, "Trying to set format to SND_PCM_FORMAT_S32_LE\n");
			
 
				+
			
 
				+    err = snd_pcm_hw_params_set_format(capture_handle_, hw_params,
			
 
				+                                       SND_PCM_FORMAT_S32_LE);
			
 
				+    if (err) {
			
 
				+      fprintf(stderr, "Failed to set format to SND_PCM_FORMAT_S32_LE: %s\n",
			
 
				+              snd_strerror(err));
			
 
				+      exit(-1);
			
 
				+    }
			
 
				+    fprintf(stderr, "Set format to SND_PCM_FORMAT_S32_LE successfully\n");
			
 
				+    pcm_format_ = 32;
			
 
				   }
			
 
				 
			
 
				   std::vector<int32_t> possible_channels = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
			
@@ -105,12 +129,29 @@ and if you want to select card 3 and the device 0 on that card, please use:
 
				   }
			
 
				 
			
 
				   if (actual_channel_count_ > 1) {
			
 
				-    fprintf(stderr, "We use only the first channel out of %d channels\n",
			
 
				-            actual_channel_count_);
			
 
				+    const char *p = std::getenv("SHERPA_NCNN_ALSA_USE_CHANNEL");
			
 
				+    if (p != nullptr) {
			
 
				+      int32_t channel_to_use = atoi(p);
			
 
				+      if (channel_to_use < 0 || channel_to_use >= actual_channel_count_) {
			
 
				+        fprintf(stderr, "Invalid SHERPA_NCNN_ALSA_USE_CHANNEL: %s\n", p);
			
 
				+        exit(-1);
			
 
				+      }
			
 
				+
			
 
				+      channel_to_use_ = channel_to_use;
			
 
				+    }
			
 
				+
			
 
				+    fprintf(stderr, "We use only channel %d out of %d channels\n",
			
 
				+            channel_to_use_, actual_channel_count_);
			
 
				 
			
 
				     fprintf(stderr,
			
 
				-            "Please use arecord and audacity to check that channel 0 indeed "
			
 
				-            "contains audio samples\n");
			
 
				+            "Please use arecord and audacity to check that channel %d indeed "
			
 
				+            "contains audio samples\n",
			
 
				+            channel_to_use_);
			
 
				+    fprintf(stderr,
			
 
				+            "Hint: You can use\n"
			
 
				+            "  export SHERPA_NCNN_ALSA_USE_CHANNEL=1\n"
			
 
				+            "to use channel 1 out of %d channels\n",
			
 
				+            actual_channel_count_);
			
 
				   }
			
 
				 
			
 
				   uint32_t actual_sample_rate = expected_sample_rate_;
			
@@ -162,11 +203,40 @@ and if you want to select card 3 and the device 0 on that card, please use:
 
				 
			
 
				 Alsa::~Alsa() { snd_pcm_close(capture_handle_); }
			
 
				 
			
 
				-const std::vector<float> &Alsa::Read(int32_t num_samples) {
			
 
				-  samples_.resize(num_samples * actual_channel_count_);
			
 
				+const std::vector<float> &Alsa::Read16(int32_t num_samples) {
			
 
				+  samples16_.resize(num_samples * actual_channel_count_);
			
 
				+
			
 
				+  // count is in frames. Each frame contains actual_channel_count_ samples
			
 
				+  int32_t count =
			
 
				+      snd_pcm_readi(capture_handle_, samples16_.data(), num_samples);
			
 
				+  if (count == -EPIPE) {
			
 
				+    fprintf(
			
 
				+        stderr,
			
 
				+        "An overrun occurred, which means the RTF of the current "
			
 
				+        "model on your board is larger than 1. You can use ./bin/sherpa-ncnn "
			
 
				+        "to verify that. Please select a smaller model whose RTF is less than "
			
 
				+        "1 for your board.");
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  samples16_.resize(count * actual_channel_count_);
			
 
				+
			
 
				+  ToFloat16(samples16_, channel_to_use_, actual_channel_count_, &samples1_);
			
 
				+
			
 
				+  if (!resampler_) {
			
 
				+    return samples1_;
			
 
				+  }
			
 
				+
			
 
				+  resampler_->Resample(samples1_.data(), samples16_.size(), false, &samples2_);
			
 
				+  return samples2_;
			
 
				+}
			
 
				+
			
 
				+const std::vector<float> &Alsa::Read32(int32_t num_samples) {
			
 
				+  samples32_.resize(num_samples * actual_channel_count_);
			
 
				 
			
 
				   // count is in frames. Each frame contains actual_channel_count_ samples
			
 
				-  int32_t count = snd_pcm_readi(capture_handle_, samples_.data(), num_samples);
			
 
				+  int32_t count =
			
 
				+      snd_pcm_readi(capture_handle_, samples32_.data(), num_samples);
			
 
				   if (count == -EPIPE) {
			
 
				     fprintf(
			
 
				         stderr,
			
@@ -177,18 +247,30 @@ const std::vector<float> &Alsa::Read(int32_t num_samples) {
 
				     exit(-1);
			
 
				   }
			
 
				 
			
 
				-  samples_.resize(count * actual_channel_count_);
			
 
				+  samples32_.resize(count * actual_channel_count_);
			
 
				 
			
 
				-  ToFloat(samples_, actual_channel_count_, &samples1_);
			
 
				+  ToFloat32(samples32_, channel_to_use_, actual_channel_count_, &samples1_);
			
 
				 
			
 
				   if (!resampler_) {
			
 
				     return samples1_;
			
 
				   }
			
 
				 
			
 
				-  resampler_->Resample(samples1_.data(), samples_.size(), false, &samples2_);
			
 
				+  resampler_->Resample(samples1_.data(), samples32_.size(), false, &samples2_);
			
 
				   return samples2_;
			
 
				 }
			
 
				 
			
 
				+const std::vector<float> &Alsa::Read(int32_t num_samples) {
			
 
				+  switch (pcm_format_) {
			
 
				+    case 16:
			
 
				+      return Read16(num_samples);
			
 
				+    case 32:
			
 
				+      return Read32(num_samples);
			
 
				+    default:
			
 
				+      fprintf(stderr, "Unsupported pcm format: %d\n", pcm_format_);
			
 
				+      exit(-1);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 }  // namespace sherpa_ncnn
			
 
				 
			
 
				 #endif
			
--- a/sherpa-ncnn/csrc/alsa.h
+++ b/sherpa-ncnn/csrc/alsa.h
@@ -42,6 +42,10 @@ class Alsa {
 
				   int32_t GetExpectedSampleRate() const { return expected_sample_rate_; }
			
 
				   int32_t GetActualSampleRate() const { return actual_sample_rate_; }
			
 
				 
			
 
				+ private:
			
 
				+  const std::vector<float> &Read16(int32_t num_samples);
			
 
				+  const std::vector<float> &Read32(int32_t num_samples);
			
 
				+
			
 
				  private:
			
 
				   snd_pcm_t *capture_handle_;
			
 
				   int32_t expected_sample_rate_ = 16000;
			
@@ -49,10 +53,22 @@ class Alsa {
 
				 
			
 
				   int32_t actual_channel_count_ = 1;
			
 
				 
			
 
				+  // If there are multipel channels, we use this channel for recognition
			
 
				+  int32_t channel_to_use_ = 0;
			
 
				+
			
 
				   std::unique_ptr<LinearResample> resampler_;
			
 
				-  std::vector<int16_t> samples_;  // directly from the microphone
			
 
				-  std::vector<float> samples1_;   // normalized version of samples_
			
 
				-  std::vector<float> samples2_;   // possibly resampled from samples1_
			
 
				+
			
 
				+  // If it is 16, we use samples16_
			
 
				+  // If it is 32, we use samples32_
			
 
				+  //
			
 
				+  // It can only be 16 or 32.
			
 
				+  int32_t pcm_format_ = 16;
			
 
				+
			
 
				+  std::vector<int16_t> samples16_;  // directly from the microphone
			
 
				+  std::vector<int32_t> samples32_;  // directly from the microphone
			
 
				+
			
 
				+  std::vector<float> samples1_;  // normalized version of samples_
			
 
				+  std::vector<float> samples2_;  // possibly resampled from samples1_
			
 
				 };
			
 
				 
			
 
				 }  // namespace sherpa_ncnn