2 年之前 · 5f5385c018
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -40,6 +40,7 @@ message(STATUS "SHERPA_NCNN_ENABLE_JNI ${SHERPA_NCNN_ENABLE_JNI}")
 
				 message(STATUS "SHERPA_NCNN_ENABLE_BINARY ${SHERPA_NCNN_ENABLE_BINARY}")
			
 
				 message(STATUS "SHERPA_NCNN_ENABLE_TEST ${SHERPA_NCNN_ENABLE_TEST}")
			
 
				 
			
 
				+
			
 
				 if(NOT CMAKE_BUILD_TYPE)
			
 
				   message(STATUS "No CMAKE_BUILD_TYPE given, default to Release")
			
 
				   set(CMAKE_BUILD_TYPE Release)
			
@@ -49,6 +50,12 @@ message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
 
				 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
			
 
				 set(CMAKE_CXX_EXTENSIONS OFF)
			
 
				 
			
 
				+include(CheckIncludeFileCXX)
			
 
				+check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA)
			
 
				+if(SHERPA_NCNN_HAS_ALSA)
			
 
				+  add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1)
			
 
				+endif()
			
 
				+
			
 
				 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
			
 
				 
			
 
				 include(kaldi-native-fbank)
			
--- a/build-aarch64-linux-gnu.sh
+++ b/build-aarch64-linux-gnu.sh
@@ -13,11 +13,28 @@ set -x
 
				 dir=build-aarch64-linux-gnu
			
 
				 mkdir -p $dir
			
 
				 cd $dir
			
 
				+
			
 
				+if [ ! -f alsa-lib/src/.libs/libasound.so ]; then
			
 
				+  echo "Start to cross-compile alsa-lib"
			
 
				+  if [ ! -d alsa-lib ]; then
			
 
				+    git clone --depth 1 https://github.com/alsa-project/alsa-lib
			
 
				+  fi
			
 
				+  pushd alsa-lib
			
 
				+  CC=aarch64-linux-gnu-gcc ./gitcompile --host=aarch64-linux-gnu
			
 
				+  popd
			
 
				+  echo "Finish cross-compiling alsa-lib"
			
 
				+fi
			
 
				+
			
 
				+export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
			
 
				+export SHERPA_NCNN_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
			
 
				+
			
 
				 cmake \
			
 
				   -DCMAKE_INSTALL_PREFIX=./install \
			
 
				   -DCMAKE_BUILD_TYPE=Release \
			
 
				   -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \
			
 
				   ..
			
 
				+cp -v $SHERPA_NCNN_ALSA_LIB_DIR/libasound.so* ./install/lib/
			
 
				 
			
 
				-make VERBOSE=1 -j4
			
 
				+make VERBOSE=1 -j10
			
 
				 make install/strip
			
 
				+
			
--- a/sherpa-ncnn/csrc/CMakeLists.txt
+++ b/sherpa-ncnn/csrc/CMakeLists.txt
@@ -25,6 +25,18 @@ if(NOT SHERPA_NCNN_ENABLE_PYTHON)
 
				     target_link_libraries(sherpa-ncnn PRIVATE sherpa-ncnn-core)
			
 
				     install(TARGETS sherpa-ncnn DESTINATION bin)
			
 
				 
			
 
				+    if(SHERPA_NCNN_HAS_ALSA)
			
 
				+      add_executable(sherpa-ncnn-alsa sherpa-ncnn-alsa.cc alsa.cc)
			
 
				+      target_link_libraries(sherpa-ncnn-alsa PRIVATE sherpa-ncnn-core)
			
 
				+
			
 
				+      if(DEFINED ENV{SHERPA_NCNN_ALSA_LIB_DIR})
			
 
				+        target_link_libraries(sherpa-ncnn-alsa PRIVATE -L$ENV{SHERPA_NCNN_ALSA_LIB_DIR} -lasound)
			
 
				+      else()
			
 
				+        target_link_libraries(sherpa-ncnn-alsa PRIVATE asound)
			
 
				+      endif()
			
 
				+      install(TARGETS sherpa-ncnn-alsa DESTINATION bin)
			
 
				+    endif()
			
 
				+
			
 
				     if(SHERPA_NCNN_ENABLE_PORTAUDIO)
			
 
				       add_executable(sherpa-ncnn-microphone
			
 
				         sherpa-ncnn-microphone.cc
			
@@ -63,3 +75,4 @@ if(SHERPA_NCNN_ENABLE_TEST)
 
				   add_executable(test-resample test-resample.cc)
			
 
				   target_link_libraries(test-resample sherpa-ncnn-core)
			
 
				 endif()
			
 
				+
			
--- a/sherpa-ncnn/csrc/alsa.cc
+++ b/sherpa-ncnn/csrc/alsa.cc
@@ -0,0 +1,162 @@
 
				+/**
			
 
				+ * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
			
 
				+ *
			
 
				+ * See LICENSE for clarification regarding multiple authors
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+#ifdef SHERPA_NCNN_ENABLE_ALSA
			
 
				+
			
 
				+#include "sherpa-ncnn/csrc/alsa.h"
			
 
				+
			
 
				+#include "alsa/asoundlib.h"
			
 
				+
			
 
				+namespace sherpa_ncnn {
			
 
				+
			
 
				+void ToFloat(const std::vector<int16_t> &in, std::vector<float> *out) {
			
 
				+  out->resize(in.size());
			
 
				+  int32_t n = in.size();
			
 
				+  for (int32_t i = 0; i != n; ++i) {
			
 
				+    (*out)[i] = in[i] / 32768.;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+Alsa::Alsa(const char *device_name) {
			
 
				+  const char *kDeviceHelp = R"(
			
 
				+Please use the command:
			
 
				+
			
 
				+  arecord -l
			
 
				+
			
 
				+to list all available devices. For instance, if the output is:
			
 
				+
			
 
				+**** List of CAPTURE Hardware Devices ****
			
 
				+card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
			
 
				+  Subdevices: 1/1
			
 
				+  Subdevice #0: subdevice #0
			
 
				+
			
 
				+and if you want to select card 3 and the device 0 on that card, please use:
			
 
				+
			
 
				+  hw:3,0
			
 
				+
			
 
				+  )";
			
 
				+
			
 
				+  int32_t err =
			
 
				+      snd_pcm_open(&capture_handle_, device_name, SND_PCM_STREAM_CAPTURE, 0);
			
 
				+  if (err) {
			
 
				+    fprintf(stderr, "Unable to open: %s. %s\n", device_name, snd_strerror(err));
			
 
				+    fprintf(stderr, "%s\n", kDeviceHelp);
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  snd_pcm_hw_params_t *hw_params;
			
 
				+  snd_pcm_hw_params_alloca(&hw_params);
			
 
				+
			
 
				+  err = snd_pcm_hw_params_any(capture_handle_, hw_params);
			
 
				+  if (err) {
			
 
				+    fprintf(stderr, "Failed to initialize hw_params: %s\n", snd_strerror(err));
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  err = snd_pcm_hw_params_set_access(capture_handle_, hw_params,
			
 
				+                                     SND_PCM_ACCESS_RW_INTERLEAVED);
			
 
				+  if (err) {
			
 
				+    fprintf(stderr, "Failed to set access type: %s\n", snd_strerror(err));
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  err = snd_pcm_hw_params_set_format(capture_handle_, hw_params,
			
 
				+                                     SND_PCM_FORMAT_S16_LE);
			
 
				+  if (err) {
			
 
				+    fprintf(stderr, "Failed to set format: %s\n", snd_strerror(err));
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  // mono
			
 
				+  err = snd_pcm_hw_params_set_channels(capture_handle_, hw_params, 1);
			
 
				+  if (err) {
			
 
				+    fprintf(stderr, "Failed to set number of channels to 1. %s\n",
			
 
				+            snd_strerror(err));
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  uint32_t actual_sample_rate = expected_sample_rate_;
			
 
				+
			
 
				+  int32_t dir = 0;
			
 
				+  err = snd_pcm_hw_params_set_rate_near(capture_handle_, hw_params,
			
 
				+                                        &actual_sample_rate, &dir);
			
 
				+  if (err) {
			
 
				+    fprintf(stderr, "Failed to set sample rate to, %d: %s\n",
			
 
				+            expected_sample_rate_, snd_strerror(err));
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+  actual_sample_rate_ = actual_sample_rate;
			
 
				+
			
 
				+  if (actual_sample_rate_ != expected_sample_rate_) {
			
 
				+    fprintf(stderr, "Failed to set sample rate to %d\n", expected_sample_rate_);
			
 
				+    fprintf(stderr, "Current sample rate to %d\n", actual_sample_rate_);
			
 
				+    fprintf(stderr,
			
 
				+            "Creating a resampler:\n"
			
 
				+            "   in_sample_rate: %d\n"
			
 
				+            "   output_sample_rate: %d\n",
			
 
				+            actual_sample_rate_, expected_sample_rate_);
			
 
				+
			
 
				+    float min_freq = std::min(actual_sample_rate_, expected_sample_rate_);
			
 
				+    float lowpass_cutoff = 0.99 * 0.5 * min_freq;
			
 
				+
			
 
				+    int32_t lowpass_filter_width = 6;
			
 
				+    resampler_ = std::make_unique<LinearResample>(
			
 
				+        actual_sample_rate_, expected_sample_rate_, lowpass_cutoff,
			
 
				+        lowpass_filter_width);
			
 
				+  } else {
			
 
				+    fprintf(stderr, "Current sample rate: %d\n", actual_sample_rate_);
			
 
				+  }
			
 
				+
			
 
				+  err = snd_pcm_hw_params(capture_handle_, hw_params);
			
 
				+  if (err) {
			
 
				+    fprintf(stderr, "Failed to set hw params: %s\n", snd_strerror(err));
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  err = snd_pcm_prepare(capture_handle_);
			
 
				+  if (err) {
			
 
				+    fprintf(stderr, "Failed to prepare for recording: %s\n", snd_strerror(err));
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  fprintf(stderr, "Recording started!\n");
			
 
				+}
			
 
				+
			
 
				+Alsa::~Alsa() { snd_pcm_close(capture_handle_); }
			
 
				+
			
 
				+const std::vector<float> &Alsa::Read(int32_t num_samples) {
			
 
				+  samples_.resize(num_samples);
			
 
				+
			
 
				+  int32_t count =
			
 
				+      snd_pcm_readi(capture_handle_, samples_.data(), samples_.size());
			
 
				+
			
 
				+  samples_.resize(count);
			
 
				+
			
 
				+  ToFloat(samples_, &samples1_);
			
 
				+
			
 
				+  if (!resampler_) {
			
 
				+    return samples1_;
			
 
				+  }
			
 
				+
			
 
				+  resampler_->Resample(samples1_.data(), samples_.size(), false, &samples2_);
			
 
				+  return samples2_;
			
 
				+}
			
 
				+
			
 
				+}  // namespace sherpa_ncnn
			
 
				+
			
 
				+#endif
			
--- a/sherpa-ncnn/csrc/alsa.h
+++ b/sherpa-ncnn/csrc/alsa.h
@@ -0,0 +1,58 @@
 
				+/**
			
 
				+ * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
			
 
				+ *
			
 
				+ * See LICENSE for clarification regarding multiple authors
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+#ifndef SHERPA_NCNN_CSRC_MICROPHONE_H_
			
 
				+#define SHERPA_NCNN_CSRC_MICROPHONE_H_
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <vector>
			
 
				+
			
 
				+#include "alsa/asoundlib.h"
			
 
				+#include "sherpa-ncnn/csrc/resample.h"
			
 
				+
			
 
				+namespace sherpa_ncnn {
			
 
				+
			
 
				+class Alsa {
			
 
				+ public:
			
 
				+  explicit Alsa(const char *device_name);
			
 
				+  ~Alsa();
			
 
				+
			
 
				+  // This is a blocking read.
			
 
				+  //
			
 
				+  // @param num_samples  Number of samples to read.
			
 
				+  //
			
 
				+  // The returned value is valid until the next call to Read().
			
 
				+  const std::vector<float> &Read(int32_t num_samples);
			
 
				+
			
 
				+  int32_t GetExpectedSampleRate() const { return expected_sample_rate_; }
			
 
				+  int32_t GetActualSampleRate() const { return actual_sample_rate_; }
			
 
				+
			
 
				+ private:
			
 
				+  snd_pcm_t *capture_handle_;
			
 
				+  int32_t expected_sample_rate_ = 16000;
			
 
				+  int32_t actual_sample_rate_;
			
 
				+
			
 
				+  std::unique_ptr<LinearResample> resampler_;
			
 
				+  std::vector<int16_t> samples_;  // directly from the microphone
			
 
				+  std::vector<float> samples1_;   // normalized version of samples_
			
 
				+  std::vector<float> samples2_;   // possibly resampled from samples1_
			
 
				+};
			
 
				+
			
 
				+}  // namespace sherpa_ncnn
			
 
				+
			
 
				+#endif  // SHERPA_NCNN_CSRC_MICROPHONE_H_
			
--- a/sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc
+++ b/sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc
@@ -0,0 +1,169 @@
 
				+/**
			
 
				+ * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
			
 
				+ *
			
 
				+ * See LICENSE for clarification regarding multiple authors
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+#include <signal.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+
			
 
				+#include <algorithm>
			
 
				+#include <cstdint>
			
 
				+
			
 
				+#include "sherpa-ncnn/csrc/alsa.h"
			
 
				+#include "sherpa-ncnn/csrc/recognizer.h"
			
 
				+
			
 
				+bool stop = false;
			
 
				+
			
 
				+static void Handler(int sig) {
			
 
				+  stop = true;
			
 
				+  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
			
 
				+};
			
 
				+
			
 
				+int main(int32_t argc, char *argv[]) {
			
 
				+  if (argc < 9 || argc > 11) {
			
 
				+    const char *usage = R"usage(
			
 
				+Usage:
			
 
				+  ./bin/sherpa-ncnn-alsa \
			
 
				+    /path/to/tokens.txt \
			
 
				+    /path/to/encoder.ncnn.param \
			
 
				+    /path/to/encoder.ncnn.bin \
			
 
				+    /path/to/decoder.ncnn.param \
			
 
				+    /path/to/decoder.ncnn.bin \
			
 
				+    /path/to/joiner.ncnn.param \
			
 
				+    /path/to/joiner.ncnn.bin \
			
 
				+    device_name \
			
 
				+    [num_threads] [decode_method, can be greedy_search/modified_beam_search]
			
 
				+
			
 
				+Please refer to
			
 
				+https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
			
 
				+for a list of pre-trained models to download.
			
 
				+
			
 
				+The device name specifies which microphone to use in case there are several
			
 
				+on you system. You can use
			
 
				+
			
 
				+  arecord -l
			
 
				+
			
 
				+to find all available microphones on your computer. For instance, if it outputs
			
 
				+
			
 
				+**** List of CAPTURE Hardware Devices ****
			
 
				+card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
			
 
				+  Subdevices: 1/1
			
 
				+  Subdevice #0: subdevice #0
			
 
				+
			
 
				+and if you want to select card 3 and the device 0 on that card, please use:
			
 
				+
			
 
				+  hw:3,0
			
 
				+
			
 
				+as the device_name.
			
 
				+)usage";
			
 
				+
			
 
				+    fprintf(stderr, "%s\n", usage);
			
 
				+    fprintf(stderr, "argc, %d\n", argc);
			
 
				+
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  signal(SIGINT, Handler);
			
 
				+
			
 
				+  sherpa_ncnn::ModelConfig model_conf;
			
 
				+  model_conf.tokens = argv[1];
			
 
				+  model_conf.encoder_param = argv[2];
			
 
				+  model_conf.encoder_bin = argv[3];
			
 
				+  model_conf.decoder_param = argv[4];
			
 
				+  model_conf.decoder_bin = argv[5];
			
 
				+  model_conf.joiner_param = argv[6];
			
 
				+  model_conf.joiner_bin = argv[7];
			
 
				+
			
 
				+  const char *device_name = argv[8];
			
 
				+
			
 
				+  int num_threads = 4;
			
 
				+  if (argc >= 10 && atoi(argv[9]) > 0) {
			
 
				+    num_threads = atoi(argv[9]);
			
 
				+  }
			
 
				+
			
 
				+  model_conf.encoder_opt.num_threads = num_threads;
			
 
				+  model_conf.decoder_opt.num_threads = num_threads;
			
 
				+  model_conf.joiner_opt.num_threads = num_threads;
			
 
				+
			
 
				+  fprintf(stderr, "%s\n", model_conf.ToString().c_str());
			
 
				+
			
 
				+  sherpa_ncnn::DecoderConfig decoder_conf;
			
 
				+  if (argc == 10) {
			
 
				+    std::string method = argv[9];
			
 
				+    if (method.compare("greedy_search") ||
			
 
				+        method.compare("modified_beam_search")) {
			
 
				+      decoder_conf.method = method;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  decoder_conf.enable_endpoint = true;
			
 
				+
			
 
				+  sherpa_ncnn::EndpointConfig endpoint_config;
			
 
				+  endpoint_config.rule1.min_trailing_silence = 2.4;
			
 
				+  endpoint_config.rule2.min_trailing_silence = 1.2;
			
 
				+  endpoint_config.rule3.min_utterance_length = 300;
			
 
				+
			
 
				+  decoder_conf.endpoint_config = endpoint_config;
			
 
				+
			
 
				+  fprintf(stderr, "%s\n", decoder_conf.ToString().c_str());
			
 
				+
			
 
				+  int32_t expected_sampling_rate = 16000;
			
 
				+  knf::FbankOptions fbank_opts;
			
 
				+  fbank_opts.frame_opts.dither = 0;
			
 
				+  fbank_opts.frame_opts.snip_edges = false;
			
 
				+  fbank_opts.frame_opts.samp_freq = expected_sampling_rate;
			
 
				+  fbank_opts.mel_opts.num_bins = 80;
			
 
				+
			
 
				+  sherpa_ncnn::Recognizer recognizer(decoder_conf, model_conf, fbank_opts);
			
 
				+  sherpa_ncnn::Alsa alsa(device_name);
			
 
				+
			
 
				+  if (alsa.GetExpectedSampleRate() != expected_sampling_rate) {
			
 
				+    fprintf(stderr, "sample rate: %d != %d\n", alsa.GetExpectedSampleRate(),
			
 
				+            expected_sampling_rate);
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+
			
 
				+  int32_t chunk = 0.1 * alsa.GetActualSampleRate();
			
 
				+
			
 
				+  std::string last_text;
			
 
				+  int32_t segment_index = 0;
			
 
				+  while (!stop) {
			
 
				+    const std::vector<float> samples = alsa.Read(chunk);
			
 
				+
			
 
				+    recognizer.AcceptWaveform(expected_sampling_rate, samples.data(),
			
 
				+                              samples.size());
			
 
				+    recognizer.Decode();
			
 
				+    bool is_endpoint = recognizer.IsEndpoint();
			
 
				+    auto text = recognizer.GetResult().text;
			
 
				+
			
 
				+    if (!text.empty() && last_text != text) {
			
 
				+      last_text = text;
			
 
				+
			
 
				+      // If you want to display in lower case, please uncomment
			
 
				+      // the followint two lines
			
 
				+      // std::transform(text.begin(), text.end(), text.begin(),
			
 
				+      //                [](auto c) { return std::tolower(c); });
			
 
				+
			
 
				+      fprintf(stderr, "%d: %s\n", segment_index, text.c_str());
			
 
				+    }
			
 
				+
			
 
				+    if (!text.empty() && is_endpoint) {
			
 
				+      ++segment_index;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  return 0;
			
 
				+}
			
--- a/sherpa-ncnn/csrc/sherpa-ncnn-microphone.cc
+++ b/sherpa-ncnn/csrc/sherpa-ncnn-microphone.cc
@@ -26,11 +26,12 @@
 
				 
			
 
				 bool stop = false;
			
 
				 
			
 
				-static int RecordCallback(const void *input_buffer, void * /*output_buffer*/,
			
 
				-                          unsigned long frames_per_buffer,  // NOLINT
			
 
				-                          const PaStreamCallbackTimeInfo * /*time_info*/,
			
 
				-                          PaStreamCallbackFlags /*status_flags*/,
			
 
				-                          void *user_data) {
			
 
				+static int32_t RecordCallback(const void *input_buffer,
			
 
				+                              void * /*output_buffer*/,
			
 
				+                              unsigned long frames_per_buffer,  // NOLINT
			
 
				+                              const PaStreamCallbackTimeInfo * /*time_info*/,
			
 
				+                              PaStreamCallbackFlags /*status_flags*/,
			
 
				+                              void *user_data) {
			
 
				   auto recognizer = reinterpret_cast<sherpa_ncnn::Recognizer *>(user_data);
			
 
				 
			
 
				   recognizer->AcceptWaveform(
			
@@ -39,12 +40,12 @@ static int RecordCallback(const void *input_buffer, void * /*output_buffer*/,
 
				   return stop ? paComplete : paContinue;
			
 
				 }
			
 
				 
			
 
				-static void Handler(int sig) {
			
 
				+static void Handler(int32_t sig) {
			
 
				   stop = true;
			
 
				-  fprintf(stderr, "\nexiting...\n");
			
 
				+  fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
			
 
				 };
			
 
				 
			
 
				-int main(int32_t argc, char *argv[]) {
			
 
				+int32_t main(int32_t argc, char *argv[]) {
			
 
				   if (argc < 8 || argc > 10) {
			
 
				     const char *usage = R"usage(
			
 
				 Usage:
			
@@ -77,7 +78,7 @@ for a list of pre-trained models to download.
 
				   model_conf.decoder_bin = argv[5];
			
 
				   model_conf.joiner_param = argv[6];
			
 
				   model_conf.joiner_bin = argv[7];
			
 
				-  int num_threads = 4;
			
 
				+  int32_t num_threads = 4;
			
 
				   if (argc >= 9 && atoi(argv[8]) > 0) {
			
 
				     num_threads = atoi(argv[8]);
			
 
				   }
			
@@ -152,11 +153,12 @@ for a list of pre-trained models to download.
 
				     exit(EXIT_FAILURE);
			
 
				   }
			
 
				 
			
 
				-  int num_tokens = 0;
			
 
				+  int32_t num_tokens = 0;
			
 
				   while (!stop) {
			
 
				     recognizer.Decode();
			
 
				     auto result = recognizer.GetResult();
			
 
				     if (result.text.size() != num_tokens) {
			
 
				+      num_tokens = result.text.size();
			
 
				       fprintf(stderr, "%s\n", result.text.c_str());
			
 
				     }
			
 
				 
			
--- a/sherpa-ncnn/csrc/sherpa-ncnn.cc
+++ b/sherpa-ncnn/csrc/sherpa-ncnn.cc
@@ -24,7 +24,7 @@
 
				 #include "sherpa-ncnn/csrc/recognizer.h"
			
 
				 #include "sherpa-ncnn/csrc/wave-reader.h"
			
 
				 
			
 
				-int main(int argc, char *argv[]) {
			
 
				+int32_t main(int32_t argc, char *argv[]) {
			
 
				   if (argc < 9 || argc > 11) {
			
 
				     const char *usage = R"usage(
			
 
				 Usage:
			
@@ -54,7 +54,7 @@ for a list of pre-trained models to download.
 
				   model_conf.decoder_bin = argv[5];
			
 
				   model_conf.joiner_param = argv[6];
			
 
				   model_conf.joiner_bin = argv[7];
			
 
				-  int num_threads = 4;
			
 
				+  int32_t num_threads = 4;
			
 
				   if (argc >= 10 && atoi(argv[9]) > 0) {
			
 
				     num_threads = atoi(argv[9]);
			
 
				   }