2 år sedan · 1c578309b7
--- a/sherpa-ncnn/csrc/alsa.cc
+++ b/sherpa-ncnn/csrc/alsa.cc
@@ -20,6 +20,8 @@
 
				 
			
 
				 #include "sherpa-ncnn/csrc/alsa.h"
			
 
				 
			
 
				+#include <algorithm>
			
 
				+
			
 
				 #include "alsa/asoundlib.h"
			
 
				 
			
 
				 namespace sherpa_ncnn {
			
--- a/sherpa-ncnn/csrc/alsa.h
+++ b/sherpa-ncnn/csrc/alsa.h
@@ -16,8 +16,8 @@
 
				  * limitations under the License.
			
 
				  */
			
 
				 
			
 
				-#ifndef SHERPA_NCNN_CSRC_MICROPHONE_H_
			
 
				-#define SHERPA_NCNN_CSRC_MICROPHONE_H_
			
 
				+#ifndef SHERPA_NCNN_CSRC_ALSA_H_
			
 
				+#define SHERPA_NCNN_CSRC_ALSA_H_
			
 
				 
			
 
				 #include <memory>
			
 
				 #include <vector>
			
@@ -57,4 +57,4 @@ class Alsa {
 
				 
			
 
				 }  // namespace sherpa_ncnn
			
 
				 
			
 
				-#endif  // SHERPA_NCNN_CSRC_MICROPHONE_H_
			
 
				+#endif  // SHERPA_NCNN_CSRC_ALSA_H_
			
--- a/sherpa-ncnn/csrc/display.h
+++ b/sherpa-ncnn/csrc/display.h
@@ -0,0 +1,92 @@
 
				+/**
			
 
				+ * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
			
 
				+ *
			
 
				+ * See LICENSE for clarification regarding multiple authors
			
 
				+ *
			
 
				+ * Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ * you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+#ifndef SHERPA_NCNN_CSRC_DISPLAY_H_
			
 
				+#define SHERPA_NCNN_CSRC_DISPLAY_H_
			
 
				+#include <stdio.h>
			
 
				+
			
 
				+#include <string>
			
 
				+
			
 
				+namespace sherpa_ncnn {
			
 
				+
			
 
				+class Display {
			
 
				+ public:
			
 
				+  void Print(int32_t segment_id, const std::string &s) {
			
 
				+#ifdef _MSC_VER
			
 
				+    fprintf(stderr, "%d:%s\n", segment_id, s.c_str());
			
 
				+    return;
			
 
				+#endif
			
 
				+    if (last_segment_ == segment_id) {
			
 
				+      Clear();
			
 
				+    } else {
			
 
				+      if (last_segment_ != -1) {
			
 
				+        fprintf(stderr, "\n\r");
			
 
				+      }
			
 
				+      last_segment_ = segment_id;
			
 
				+      num_previous_lines_ = 0;
			
 
				+    }
			
 
				+
			
 
				+    fprintf(stderr, "\r%d:", segment_id);
			
 
				+
			
 
				+    int32_t i = 0;
			
 
				+    for (size_t n = 0; n < s.size();) {
			
 
				+      if (s[n] > 0 && s[n] < 0x7f) {
			
 
				+        fprintf(stderr, "%c", s[n]);
			
 
				+        ++n;
			
 
				+      } else {
			
 
				+        // Each Chinese character occupies 3 bytes for UTF-8 encoding.
			
 
				+        std::string tmp(s.begin() + n, s.begin() + n + 3);
			
 
				+        fprintf(stderr, "%s", tmp.data());
			
 
				+        n += 3;
			
 
				+      }
			
 
				+
			
 
				+      ++i;
			
 
				+      if (i >= max_word_per_line_ && n + 1 < s.size() && s[n] == ' ') {
			
 
				+        fprintf(stderr, "\n\r ");
			
 
				+        ++num_previous_lines_;
			
 
				+        i = 0;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  // Clear the output for the current segment
			
 
				+  void Clear() {
			
 
				+    ClearCurrentLine();
			
 
				+    while (num_previous_lines_ > 0) {
			
 
				+      GoUpOneLine();
			
 
				+      ClearCurrentLine();
			
 
				+      --num_previous_lines_;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Clear the current line
			
 
				+  void ClearCurrentLine() const { fprintf(stderr, "\33[2K\r"); }
			
 
				+
			
 
				+  // Move the cursor to the previous line
			
 
				+  void GoUpOneLine() const { fprintf(stderr, "\033[1A\r"); }
			
 
				+
			
 
				+ private:
			
 
				+  int32_t max_word_per_line_ = 80;
			
 
				+  int32_t num_previous_lines_ = 0;
			
 
				+  int32_t last_segment_ = -1;
			
 
				+};
			
 
				+
			
 
				+}  // namespace sherpa_ncnn
			
 
				+
			
 
				+#endif  // SHERPA_NCNN_CSRC_DISPLAY_H_
			
--- a/sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc
+++ b/sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc
@@ -23,6 +23,7 @@
 
				 #include <cstdint>
			
 
				 
			
 
				 #include "sherpa-ncnn/csrc/alsa.h"
			
 
				+#include "sherpa-ncnn/csrc/display.h"
			
 
				 #include "sherpa-ncnn/csrc/recognizer.h"
			
 
				 
			
 
				 bool stop = false;
			
@@ -113,7 +114,7 @@ as the device_name.
 
				 
			
 
				   sherpa_ncnn::EndpointConfig endpoint_config;
			
 
				   endpoint_config.rule1.min_trailing_silence = 2.4;
			
 
				-  endpoint_config.rule2.min_trailing_silence = 1.2;
			
 
				+  endpoint_config.rule2.min_trailing_silence = 0.8;  // <--tune this value !
			
 
				   endpoint_config.rule3.min_utterance_length = 300;
			
 
				 
			
 
				   decoder_conf.endpoint_config = endpoint_config;
			
@@ -141,6 +142,7 @@ as the device_name.
 
				 
			
 
				   std::string last_text;
			
 
				   int32_t segment_index = 0;
			
 
				+  sherpa_ncnn::Display display;
			
 
				   while (!stop) {
			
 
				     const std::vector<float> samples = alsa.Read(chunk);
			
 
				 
			
@@ -153,12 +155,10 @@ as the device_name.
 
				     if (!text.empty() && last_text != text) {
			
 
				       last_text = text;
			
 
				 
			
 
				-      // If you want to display in lower case, please uncomment
			
 
				-      // the following two lines
			
 
				-      // std::transform(text.begin(), text.end(), text.begin(),
			
 
				-      //                [](auto c) { return std::tolower(c); });
			
 
				+      std::transform(text.begin(), text.end(), text.begin(),
			
 
				+                     [](auto c) { return std::tolower(c); });
			
 
				 
			
 
				-      fprintf(stderr, "%d: %s\n", segment_index, text.c_str());
			
 
				+      display.Print(segment_index, text);
			
 
				     }
			
 
				 
			
 
				     if (!text.empty() && is_endpoint) {
			
--- a/sherpa-ncnn/csrc/sherpa-ncnn-microphone.cc
+++ b/sherpa-ncnn/csrc/sherpa-ncnn-microphone.cc
@@ -21,6 +21,7 @@
 
				 #include <stdlib.h>
			
 
				 
			
 
				 #include "portaudio.h"  // NOLINT
			
 
				+#include "sherpa-ncnn/csrc/display.h"
			
 
				 #include "sherpa-ncnn/csrc/microphone.h"
			
 
				 #include "sherpa-ncnn/csrc/recognizer.h"
			
 
				 
			
@@ -97,6 +98,18 @@ for a list of pre-trained models to download.
 
				       decoder_conf.method = method;
			
 
				     }
			
 
				   }
			
 
				+
			
 
				+  decoder_conf.enable_endpoint = true;
			
 
				+
			
 
				+  sherpa_ncnn::EndpointConfig endpoint_config;
			
 
				+  endpoint_config.rule1.min_trailing_silence = 2.4;
			
 
				+  endpoint_config.rule2.min_trailing_silence = 0.8;  // <--tune this value !
			
 
				+  endpoint_config.rule3.min_utterance_length = 300;
			
 
				+
			
 
				+  decoder_conf.endpoint_config = endpoint_config;
			
 
				+
			
 
				+  fprintf(stderr, "%s\n", decoder_conf.ToString().c_str());
			
 
				+
			
 
				   knf::FbankOptions fbank_opts;
			
 
				   fbank_opts.frame_opts.dither = 0;
			
 
				   fbank_opts.frame_opts.snip_edges = false;
			
@@ -153,13 +166,26 @@ for a list of pre-trained models to download.
 
				     exit(EXIT_FAILURE);
			
 
				   }
			
 
				 
			
 
				-  int32_t num_tokens = 0;
			
 
				+  std::string last_text;
			
 
				+  int32_t segment_index = 0;
			
 
				+  sherpa_ncnn::Display display;
			
 
				   while (!stop) {
			
 
				     recognizer.Decode();
			
 
				-    auto result = recognizer.GetResult();
			
 
				-    if (result.text.size() != num_tokens) {
			
 
				-      num_tokens = result.text.size();
			
 
				-      fprintf(stderr, "%s\n", result.text.c_str());
			
 
				+
			
 
				+    bool is_endpoint = recognizer.IsEndpoint();
			
 
				+    auto text = recognizer.GetResult().text;
			
 
				+
			
 
				+    if (!text.empty() && last_text != text) {
			
 
				+      last_text = text;
			
 
				+
			
 
				+      std::transform(text.begin(), text.end(), text.begin(),
			
 
				+                     [](auto c) { return std::tolower(c); });
			
 
				+
			
 
				+      display.Print(segment_index, text);
			
 
				+    }
			
 
				+
			
 
				+    if (!text.empty() && is_endpoint) {
			
 
				+      ++segment_index;
			
 
				     }
			
 
				 
			
 
				     Pa_Sleep(20);  // sleep for 20ms
			
--- a/sherpa-ncnn/csrc/sherpa-ncnn.cc
+++ b/sherpa-ncnn/csrc/sherpa-ncnn.cc
@@ -20,7 +20,7 @@
 
				 #include <stdio.h>
			
 
				 
			
 
				 #include <algorithm>
			
 
				-#include <chrono>
			
 
				+#include <chrono>  // NOLINT
			
 
				 #include <iostream>
			
 
				 
			
 
				 #include "net.h"  // NOLINT