2 年之前 · 1b15462ac6
--- a/ffmpeg-examples/sherpa-ncnn-ffmpeg.cc
+++ b/ffmpeg-examples/sherpa-ncnn-ffmpeg.cc
@@ -242,8 +242,8 @@ end:
 
				 static void FFmpegDecodeFrame(const AVFrame *frame,
			
 
				                               const sherpa_ncnn::Recognizer &recognizer,
			
 
				                               sherpa_ncnn::Stream *s,
			
 
				-                              sherpa_ncnn::Display &display,
			
 
				-                              std::string &last_text, int32_t &segment_index) {
			
 
				+                              sherpa_ncnn::Display *display,
			
 
				+                              std::string *last_text, int32_t *segment_index) {
			
 
				 #define N 3200  // 0.2 s. Sample rate is fixed to 16 kHz
			
 
				   static float samples[N];
			
 
				   static int32_t nb_samples = 0;
			
@@ -259,18 +259,18 @@ static void FFmpegDecodeFrame(const AVFrame *frame,
 
				     bool is_endpoint = recognizer.IsEndpoint(s);
			
 
				     auto text = recognizer.GetResult(s).text;
			
 
				 
			
 
				-    if (!text.empty() && last_text != text) {
			
 
				-      last_text = text;
			
 
				+    if (!text.empty() && *last_text != text) {
			
 
				+      *last_text = text;
			
 
				 
			
 
				       std::transform(text.begin(), text.end(), text.begin(),
			
 
				                      [](auto c) { return std::tolower(c); });
			
 
				 
			
 
				-      display.Print(segment_index, text);
			
 
				+      display->Print(*segment_index, text);
			
 
				     }
			
 
				 
			
 
				     if (is_endpoint) {
			
 
				       if (!text.empty()) {
			
 
				-        ++segment_index;
			
 
				+        (*segment_index)++;
			
 
				       }
			
 
				 
			
 
				       recognizer.Reset(s);
			
@@ -296,13 +296,16 @@ static void Handler(int32_t sig) {
 
				   raise(sig);
			
 
				 };
			
 
				 
			
 
				+#define SET_STRING_BY_ENV(config, key) \
			
 
				+  if (getenv(key)) {                   \
			
 
				+    config = getenv(key);              \
			
 
				+  }
			
 
				+
			
 
				 #define SET_CONFIG_BY_ENV(config, key, required) \
			
 
				   config = "";                                   \
			
 
				-  if (getenv(key)) {                             \
			
 
				-    config = getenv(key);                        \
			
 
				-    if (required) {                              \
			
 
				-      parsed_required_envs++;                    \
			
 
				-    }                                            \
			
 
				+  SET_STRING_BY_ENV(config, key);                \
			
 
				+  if (!(config).empty() && required) {           \
			
 
				+    parsed_required_envs++;                      \
			
 
				   }
			
 
				 
			
 
				 static int32_t ParseConfigFromENV(sherpa_ncnn::RecognizerConfig *config,
			
@@ -426,6 +429,48 @@ static int32_t OverwriteConfigByCLI(int32_t argc, char **argv,
 
				   return 0;
			
 
				 }
			
 
				 
			
 
				+// A simple display, without window support, doesn't rewrite current line.
			
 
				+// It only output the new text, which only works in greedy_search mode.
			
 
				+// It doesn't support modified_beam_search mode, which might change the
			
 
				+// generated text.
			
 
				+class SimpleDisplay : public sherpa_ncnn::Display {
			
 
				+ public:
			
 
				+  SimpleDisplay() {}
			
 
				+  void Print(int32_t segment_id, const std::string &s) {
			
 
				+    if (last_segment_ != segment_id) {
			
 
				+      last_segment_ = segment_id;
			
 
				+      last_text_ = "";
			
 
				+      fprintf(stderr, "\n%d:", segment_id);
			
 
				+    }
			
 
				+
			
 
				+    if (s.length() > last_text_.length()) {
			
 
				+      std::string tmp(s.begin() + last_text_.length(), s.end());
			
 
				+      fprintf(stderr, "%s", tmp.c_str());
			
 
				+    } else {
			
 
				+      fprintf(stderr, "%s", s.c_str());
			
 
				+    }
			
 
				+    last_text_ = s;
			
 
				+  }
			
 
				+
			
 
				+ private:
			
 
				+  std::string last_text_;
			
 
				+  int32_t last_segment_ = -1;
			
 
				+};
			
 
				+
			
 
				+std::unique_ptr<sherpa_ncnn::Display> CreateDisplay() {
			
 
				+  std::string val;
			
 
				+  SET_STRING_BY_ENV(val, "SHERPA_NCNN_SIMPLE_DISLAY");
			
 
				+
			
 
				+  std::transform(val.begin(), val.end(), val.begin(),
			
 
				+                 [](auto c) { return std::tolower(c); });
			
 
				+
			
 
				+  if (val == "on" || val == "true") {
			
 
				+    return std::make_unique<SimpleDisplay>();
			
 
				+  } else {
			
 
				+    return std::make_unique<sherpa_ncnn::Display>();
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 int32_t main(int32_t argc, char **argv) {
			
 
				   // Set the default values for config.
			
 
				   sherpa_ncnn::RecognizerConfig config;
			
@@ -468,6 +513,7 @@ Or configure by environment variables:
 
				   SHERPA_NCNN_RULE1_MIN_TRAILING_SILENCE=2.4 \
			
 
				   SHERPA_NCNN_RULE2_MIN_TRAILING_SILENCE=1.2 \
			
 
				   SHERPA_NCNN_RULE3_MIN_UTTERANCE_LENGTH=300 \
			
 
				+  SHERPA_NCNN_SIMPLE_DISLAY=on|off \
			
 
				   ./bin/sherpa-ncnn-ffmpeg
			
 
				 
			
 
				 Please refer to
			
@@ -515,7 +561,7 @@ for a list of pre-trained models to download.
 
				 
			
 
				   std::string last_text;
			
 
				   int32_t segment_index = 0;
			
 
				-  sherpa_ncnn::Display display;
			
 
				+  std::unique_ptr<sherpa_ncnn::Display> display = CreateDisplay();
			
 
				   while (1) {
			
 
				     if ((ret = av_read_frame(fmt_ctx, packet)) < 0) {
			
 
				       break;
			
@@ -557,8 +603,8 @@ for a list of pre-trained models to download.
 
				             if (ret < 0) {
			
 
				               exit(1);
			
 
				             }
			
 
				-            FFmpegDecodeFrame(filt_frame, recognizer, s.get(), display,
			
 
				-                              last_text, segment_index);
			
 
				+            FFmpegDecodeFrame(filt_frame, recognizer, s.get(), display.get(),
			
 
				+                              &last_text, &segment_index);
			
 
				             av_frame_unref(filt_frame);
			
 
				           }
			
 
				           av_frame_unref(frame);
			
@@ -583,7 +629,7 @@ for a list of pre-trained models to download.
 
				     last_text = text;
			
 
				     std::transform(text.begin(), text.end(), text.begin(),
			
 
				                    [](auto c) { return std::tolower(c); });
			
 
				-    display.Print(segment_index, text);
			
 
				+    display->Print(segment_index, text);
			
 
				   }
			
 
				 
			
 
				   avfilter_graph_free(&filter_graph);
			
--- a/sherpa-ncnn/csrc/display.h
+++ b/sherpa-ncnn/csrc/display.h
@@ -30,7 +30,7 @@ class Display {
 
				   explicit Display(int32_t max_word_per_line = 60)
			
 
				       : max_word_per_line_(max_word_per_line) {}
			
 
				 
			
 
				-  void Print(int32_t segment_id, const std::string &s) {
			
 
				+  virtual void Print(int32_t segment_id, const std::string &s) {
			
 
				 #ifdef _MSC_VER
			
 
				     if (segment_id != -1) {
			
 
				       if (last_segment_ != segment_id) {