2 years ago · 2e78bbf587
--- a/README.md
+++ b/README.md
@@ -15,6 +15,9 @@ We support all platforms that [ncnn](https://github.com/tencent/ncnn) supports.
 
															 Everything can be compiled from source with static link. The generated
														
 
															 executable depends only on system libraries.
														
 
															+**HINT**: It does not depend on PyTorch or any other inference frameworks
														
 
															+other than [ncnn](https://github.com/tencent/ncnn).
														
 
															+
														
 
															 Please see the documentation <https://k2-fsa.github.io/sherpa/ncnn/index.html>
														
 
															 for installation and usages, e.g.,
														
@@ -24,21 +27,18 @@ for installation and usages, e.g.,
 
															 We provide a few YouTube videos for demonstration about real-time speech recognition
														
 
															 with `sherpa-ncnn` using a microphone:
														
 
															-  - `English`: <https://www.youtube.com/watch?v=m6ynSxycpX0>
														
 
															-  - `Chinese`: <https://www.youtube.com/watch?v=bbQfoRT75oM>
														
 
															-  - `Chinese + English` Android demo: <https://www.youtube.com/shorts/S5Owcrb8vzU>
														
 
															-  - `Chinese (with background noise)` Android demo : <https://www.youtube.com/shorts/KI1-d-W9uZw>
														
 
															-  - `Chinese` Android demo : <https://www.youtube.com/shorts/lpDAG36T1R4>
														
 
															-  - `Chinese poem with background music` Android demo : <https://www.youtube.com/shorts/5CJ-r8VNuwo>
														
 
															-
														
 
															-**Note**: If you don't have access to YouTube, we provide the links
														
 
															-in bilibili below:
														
 
															-
														
 
															   - `English`: <https://www.bilibili.com/video/BV1TP411p7dh/>
														
 
															   - `Chinese`: <https://www.bilibili.com/video/BV1214y177vu>
														
 
															-  - `Chinese + English` Android demo: <https://www.bilibili.com/video/BV1Ge411A7XS>
														
 
															+
														
 
															+  - Multilingual (Chinese + English) with endpointing Python demo : <https://www.bilibili.com/video/BV1eK411y788/>
														
 
															+
														
 
															+  - **Android demos**
														
 
															+
														
 
															+  - Multilingual (Chinese + English) Android demo 1: <https://www.bilibili.com/video/BV1Ge411A7XS>
														
 
															+  - Multilingual (Chinese + English) Android demo 2: <https://www.bilibili.com/video/BV1eK411y788/>
														
 
															   - `Chinese (with background noise)` Android demo : <https://www.bilibili.com/video/BV1GR4y167fx>
														
 
															   - `Chinese` Android demo : <https://www.bilibili.com/video/BV1744y1Z76H>
														
 
															   - `Chinese poem with background music` Android demo : <https://www.bilibili.com/video/BV1vR4y1k7eo>
														
 
															+
														
 
															 See also <https://github.com/k2-fsa/sherpa>
														
--- a/android/SherpaNcnn/app/src/main/java/com/k2fsa/sherpa/ncnn/MainActivity.kt
+++ b/android/SherpaNcnn/app/src/main/java/com/k2fsa/sherpa/ncnn/MainActivity.kt
@@ -173,7 +173,7 @@ class MainActivity : AppCompatActivity() {
 
															         model = SherpaNcnn(
														
 
															             assetManager = application.assets,
														
 
															             modelConfig = getModelConfig(type = 1, useGPU = useGPU)!!,
														
 
															-            decoderConfig=getDecoderConfig(useEndpoint = true),
														
 
															+            decoderConfig = getDecoderConfig(enableEndpoint = true),
														
 
															             fbankConfig = getFbankConfig(),
														
 
															         )
														
 
															     }
														
--- a/android/SherpaNcnn/app/src/main/java/com/k2fsa/sherpa/ncnn/SherpaNcnn.kt
+++ b/android/SherpaNcnn/app/src/main/java/com/k2fsa/sherpa/ncnn/SherpaNcnn.kt
@@ -17,7 +17,7 @@ data class EndpointConfig(
 
															 data class DecoderConfig(
														
 
															     var method: String = "modified_beam_search", // valid values: greedy_search, modified_beam_search
														
 
															     var numActivePaths: Int = 4, // used only by modified_beam_search
														
 
															-    var useEndpoint: Boolean = true,
														
 
															+    var enableEndpoint: Boolean = true,
														
 
															     var endpointConfig: EndpointConfig = EndpointConfig(),
														
 
															 )
														
@@ -169,11 +169,11 @@ fun getModelConfig(type: Int, useGPU: Boolean): ModelConfig? {
 
															     return null
														
 
															 }
														
 
															-fun getDecoderConfig(useEndpoint: Boolean): DecoderConfig {
														
 
															+fun getDecoderConfig(enableEndpoint: Boolean): DecoderConfig {
														
 
															     return DecoderConfig(
														
 
															         method = "modified_beam_search",
														
 
															         numActivePaths = 4,
														
 
															-        useEndpoint = useEndpoint,
														
 
															+        enableEndpoint = enableEndpoint,
														
 
															         endpointConfig = EndpointConfig(
														
 
															             rule1 = EndpointRule(false, 2.4f, 0.0f),
														
 
															             rule2 = EndpointRule(true, 1.4f, 0.0f),
														
--- a/python-api-examples/README.md
+++ b/python-api-examples/README.md
@@ -7,3 +7,15 @@ This file shows how to recognize a file.
 
															 ## speech-recognition-from-microphone.py
														
 
															 This file demonstrates how to do real-time speech recognition with a microphone.
														
 
															+
														
 
															+You can find video demos about this file at the following addresses:
														
 
															+
														
 
															+  - https://www.bilibili.com/video/BV1K44y197Fg/
														
 
															+  - https://www.youtube.com/watch?v=74SxVueROok
														
 
															+
														
 
															+## speech-recognition-from-microphone-with-endpoint-detection.py
														
 
															+
														
 
															+Similar to `speech-recognition-from-microphone.py` but it also enables
														
 
															+endpoint detection.
														
 
															+
														
 
															+You can find a video demo about this file at <https://www.bilibili.com/video/BV1eK411y788>
														
--- a/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py
+++ b/python-api-examples/speech-recognition-from-microphone-with-endpoint-detection.py
@@ -0,0 +1,80 @@
 
															+#!/usr/bin/env python3
														
 
															+
														
 
															+# Real-time speech recognition from a microphone with sherpa-ncnn Python API
														
 
															+# with endpoint detection.
														
 
															+#
														
 
															+# Please refer to
														
 
															+# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
														
 
															+# to download pre-trained models
														
 
															+
														
 
															+import sys
														
 
															+
														
 
															+try:
														
 
															+    import sounddevice as sd
														
 
															+except ImportError as e:
														
 
															+    print("Please install sounddevice first. You can use")
														
 
															+    print()
														
 
															+    print("  pip install sounddevice")
														
 
															+    print()
														
 
															+    print("to install it")
														
 
															+    sys.exit(-1)
														
 
															+
														
 
															+import sherpa_ncnn
														
 
															+
														
 
															+
														
 
															+def create_recognizer():
														
 
															+    # Please replace the model files if needed.
														
 
															+    # See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
														
 
															+    # for download links.
														
 
															+    recognizer = sherpa_ncnn.Recognizer(
														
 
															+        tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
														
 
															+        encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
														
 
															+        encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
														
 
															+        decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
														
 
															+        decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
														
 
															+        joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
														
 
															+        joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
														
 
															+        num_threads=4,
														
 
															+        decoding_method="modified_beam_search",
														
 
															+        enable_endpoint_detection=True,
														
 
															+        rule1_min_trailing_silence=2.4,
														
 
															+        rule2_min_trailing_silence=1.2,
														
 
															+        rule3_min_utterance_length=300,
														
 
															+    )
														
 
															+    return recognizer
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    print("Started! Please speak")
														
 
															+    recognizer = create_recognizer()
														
 
															+    sample_rate = recognizer.sample_rate
														
 
															+    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms
														
 
															+    last_result = ""
														
 
															+    segment_id = 0
														
 
															+    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
														
 
															+        while True:
														
 
															+            samples, _ = s.read(samples_per_read)  # a blocking read
														
 
															+            samples = samples.reshape(-1)
														
 
															+            recognizer.accept_waveform(sample_rate, samples)
														
 
															+
														
 
															+            is_endpoint = recognizer.is_endpoint
														
 
															+
														
 
															+            result = recognizer.text
														
 
															+            if result and (last_result != result):
														
 
															+                last_result = result
														
 
															+                print(f"{segment_id}: {result}")
														
 
															+
														
 
															+            if result and is_endpoint:
														
 
															+                segment_id += 1
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    devices = sd.query_devices()
														
 
															+    print(devices)
														
 
															+    default_input_device_idx = sd.default.device[0]
														
 
															+    print(f'Use default device: {devices[default_input_device_idx]["name"]}')
														
 
															+
														
 
															+    try:
														
 
															+        main()
														
 
															+    except KeyboardInterrupt:
														
 
															+        print("\nCaught Ctrl + C. Exiting")
														
--- a/python-api-examples/speech-recognition-from-microphone.py
+++ b/python-api-examples/speech-recognition-from-microphone.py
@@ -42,7 +42,7 @@ def main():
 
															     print("Started! Please speak")
														
 
															     recognizer = create_recognizer()
														
 
															     sample_rate = recognizer.sample_rate
														
 
															-    samples_per_read = int(0.02 * sample_rate)  # 20ms
														
 
															+    samples_per_read = int(0.1 * sample_rate)  # 0.1 second = 100 ms
														
 
															     last_result = ""
														
 
															     with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
														
 
															         while True:
														
--- a/sherpa-ncnn/csrc/CPPLINT.cfg
+++ b/sherpa-ncnn/csrc/CPPLINT.cfg
@@ -0,0 +1 @@
 
															+exclude_files=generate-int8-scale-table.cc
														
--- a/sherpa-ncnn/csrc/endpoint.h
+++ b/sherpa-ncnn/csrc/endpoint.h
@@ -18,6 +18,7 @@
 
															 #ifndef SHERPA_NCNN_CSRC_ENDPOINT_H_
														
 
															 #define SHERPA_NCNN_CSRC_ENDPOINT_H_
														
 
															+#include <string>
														
 
															 #include <vector>
														
 
															 namespace sherpa_ncnn {
														
@@ -54,6 +55,10 @@ struct EndpointConfig {
 
															   EndpointRule rule2;
														
 
															   EndpointRule rule3;
														
 
															+  EndpointConfig(const EndpointRule &rule1, const EndpointRule &rule2,
														
 
															+                 const EndpointRule &rule3)
														
 
															+      : rule1(rule1), rule2(rule2), rule3(rule3) {}
														
 
															+
														
 
															   EndpointConfig()
														
 
															       : rule1(false, 2.4, 0), rule2(true, 1.4, 0), rule3(false, 0, 20) {}
														
--- a/sherpa-ncnn/csrc/greedy-search-decoder.cc
+++ b/sherpa-ncnn/csrc/greedy-search-decoder.cc
@@ -21,7 +21,7 @@
 
															 namespace sherpa_ncnn {
														
 
															-void GreedySearchDecoder::AcceptWaveform(const int32_t sample_rate,
														
 
															+void GreedySearchDecoder::AcceptWaveform(const float sample_rate,
														
 
															                                          const float *input_buffer,
														
 
															                                          int32_t frames_per_buffer) {
														
 
															   feature_extractor_.AcceptWaveform(sample_rate, input_buffer,
														
@@ -77,7 +77,7 @@ void GreedySearchDecoder::Decode() {
 
															 RecognitionResult GreedySearchDecoder::GetResult() {
														
 
															   auto ans = result_;
														
 
															-  if (config_.use_endpoint && IsEndpoint()) {
														
 
															+  if (config_.enable_endpoint && IsEndpoint()) {
														
 
															     ResetResult();
														
 
															     endpoint_start_frame_ = num_processed_;
														
 
															   }
														
--- a/sherpa-ncnn/csrc/greedy-search-decoder.h
+++ b/sherpa-ncnn/csrc/greedy-search-decoder.h
@@ -51,7 +51,7 @@ class GreedySearchDecoder : public Decoder {
 
															     decoder_out_ = model_->RunDecoder(decoder_input_);
														
 
															   }
														
 
															-  void AcceptWaveform(int32_t sample_rate, const float *input_buffer,
														
 
															+  void AcceptWaveform(float sample_rate, const float *input_buffer,
														
 
															                       int32_t frames_per_buffer) override;
														
 
															   void Decode() override;
														
--- a/sherpa-ncnn/csrc/modified-beam-search-decoder.cc
+++ b/sherpa-ncnn/csrc/modified-beam-search-decoder.cc
@@ -25,7 +25,7 @@
 
															 namespace sherpa_ncnn {
														
 
															-void ModifiedBeamSearchDecoder::AcceptWaveform(const int32_t sample_rate,
														
 
															+void ModifiedBeamSearchDecoder::AcceptWaveform(const float sample_rate,
														
 
															                                                const float *input_buffer,
														
 
															                                                int32_t frames_per_buffer) {
														
 
															   feature_extractor_.AcceptWaveform(sample_rate, input_buffer,
														
@@ -113,7 +113,7 @@ RecognitionResult ModifiedBeamSearchDecoder::GetResult() {
 
															   result_.num_trailing_blanks = best_hyp.num_trailing_blanks;
														
 
															   auto ans = result_;
														
 
															-  if (config_.use_endpoint && IsEndpoint()) {
														
 
															+  if (config_.enable_endpoint && IsEndpoint()) {
														
 
															     ResetResult();
														
 
															     endpoint_start_frame_ = num_processed_;
														
 
															   }
														
--- a/sherpa-ncnn/csrc/modified-beam-search-decoder.h
+++ b/sherpa-ncnn/csrc/modified-beam-search-decoder.h
@@ -49,7 +49,7 @@ class ModifiedBeamSearchDecoder : public Decoder {
 
															     ResetResult();
														
 
															   }
														
 
															-  void AcceptWaveform(int32_t sample_rate, const float *input_buffer,
														
 
															+  void AcceptWaveform(float sample_rate, const float *input_buffer,
														
 
															                       int32_t frames_per_buffer) override;
														
 
															   void Decode() override;
														
--- a/sherpa-ncnn/csrc/recognizer.cc
+++ b/sherpa-ncnn/csrc/recognizer.cc
@@ -17,6 +17,8 @@
 
															  * limitations under the License.
														
 
															  */
														
 
															+#include "sherpa-ncnn/csrc/recognizer.h"
														
 
															+
														
 
															 #include <memory>
														
 
															 #include <string>
														
 
															 #include <vector>
														
@@ -32,7 +34,7 @@ std::string DecoderConfig::ToString() const {
 
															   os << "DecoderConfig(";
														
 
															   os << "method=\"" << method << "\", ";
														
 
															   os << "num_active_paths=" << num_active_paths << ", ";
														
 
															-  os << "use_endpoint=" << (use_endpoint ? "True" : "False") << ", ";
														
 
															+  os << "enable_endpoint=" << (enable_endpoint ? "True" : "False") << ", ";
														
 
															   os << "endpoint_config=" << endpoint_config.ToString() << ")";
														
 
															   return os.str();
														
@@ -42,8 +44,8 @@ Recognizer::Recognizer(
 
															 #if __ANDROID_API__ >= 9
														
 
															     AAssetManager *mgr,
														
 
															 #endif
														
 
															-    const DecoderConfig decoder_conf, const ModelConfig model_conf,
														
 
															-    const knf::FbankOptions fbank_opts)
														
 
															+    const DecoderConfig &decoder_conf, const ModelConfig &model_conf,
														
 
															+    const knf::FbankOptions &fbank_opts)
														
 
															     :
														
 
															 #if __ANDROID_API__ >= 9
														
 
															       model_(Model::Create(mgr, model_conf)),
														
@@ -65,7 +67,7 @@ Recognizer::Recognizer(
 
															   }
														
 
															 }
														
 
															-void Recognizer::AcceptWaveform(int32_t sample_rate, const float *input_buffer,
														
 
															+void Recognizer::AcceptWaveform(float sample_rate, const float *input_buffer,
														
 
															                                 int32_t frames_per_buffer) {
														
 
															   decoder_->AcceptWaveform(sample_rate, input_buffer, frames_per_buffer);
														
 
															 }
														
--- a/sherpa-ncnn/csrc/recognizer.h
+++ b/sherpa-ncnn/csrc/recognizer.h
@@ -32,6 +32,7 @@
 
															 namespace sherpa_ncnn {
														
 
															+// TODO(fangjun): Add timestamps
														
 
															 struct RecognitionResult {
														
 
															   std::vector<int32_t> tokens;
														
 
															   std::string text;
														
@@ -47,9 +48,19 @@ struct DecoderConfig {
 
															   int32_t num_active_paths = 4;  // for modified beam search
														
 
															-  bool use_endpoint = true;
														
 
															+  bool enable_endpoint = false;
														
 
															   EndpointConfig endpoint_config;
														
 
															+
														
 
															+  DecoderConfig() = default;
														
 
															+
														
 
															+  DecoderConfig(const std::string &method, int32_t num_active_paths,
														
 
															+                bool enable_endpoint, const EndpointConfig &endpoint_config)
														
 
															+      : method(method),
														
 
															+        num_active_paths(num_active_paths),
														
 
															+        enable_endpoint(enable_endpoint),
														
 
															+        endpoint_config(endpoint_config) {}
														
 
															+
														
 
															   std::string ToString() const;
														
 
															 };
														
@@ -57,7 +68,7 @@ class Decoder {
 
															  public:
														
 
															   virtual ~Decoder() = default;
														
 
															-  virtual void AcceptWaveform(int32_t sample_rate, const float *input_buffer,
														
 
															+  virtual void AcceptWaveform(float sample_rate, const float *input_buffer,
														
 
															                               int32_t frames_per_buffer) = 0;
														
 
															   virtual void Decode() = 0;
														
@@ -81,12 +92,12 @@ class Recognizer {
 
															 #if __ANDROID_API__ >= 9
														
 
															       AAssetManager *mgr,
														
 
															 #endif
														
 
															-      const DecoderConfig decoder_conf, const ModelConfig model_conf,
														
 
															-      const knf::FbankOptions fbank_opts);
														
 
															+      const DecoderConfig &decoder_conf, const ModelConfig &model_conf,
														
 
															+      const knf::FbankOptions &fbank_opts);
														
 
															   ~Recognizer() = default;
														
 
															-  void AcceptWaveform(int32_t sample_rate, const float *input_buffer,
														
 
															+  void AcceptWaveform(float sample_rate, const float *input_buffer,
														
 
															                       int32_t frames_per_buffer);
														
 
															   void Decode();
														
--- a/sherpa-ncnn/jni/jni.cc
+++ b/sherpa-ncnn/jni/jni.cc
@@ -144,8 +144,8 @@ static DecoderConfig GetDecoderConfig(JNIEnv *env, jobject config) {
 
															   fid = env->GetFieldID(cls, "numActivePaths", "I");
														
 
															   decoder_config.num_active_paths = env->GetIntField(config, fid);
														
 
															-  fid = env->GetFieldID(cls, "useEndpoint", "Z");
														
 
															-  decoder_config.use_endpoint = env->GetBooleanField(config, fid);
														
 
															+  fid = env->GetFieldID(cls, "enableEndpoint", "Z");
														
 
															+  decoder_config.enable_endpoint = env->GetBooleanField(config, fid);
														
 
															   fid = env->GetFieldID(cls, "endpointConfig",
														
 
															                         "Lcom/k2fsa/sherpa/ncnn/EndpointConfig;");
														
--- a/sherpa-ncnn/python/csrc/CMakeLists.txt
+++ b/sherpa-ncnn/python/csrc/CMakeLists.txt
@@ -1,10 +1,9 @@
 
															 include_directories(${PROJECT_SOURCE_DIR})
														
 
															 set(srcs
														
 
															-  decode.cc
														
 
															-  features.cc
														
 
															-  mat-util.cc
														
 
															+  endpoint.cc
														
 
															   model.cc
														
 
															+  recognizer.cc
														
 
															   sherpa-ncnn.cc
														
 
															 )
														
--- a/sherpa-ncnn/python/csrc/decode.cc
+++ b/sherpa-ncnn/python/csrc/decode.cc
@@ -1,46 +0,0 @@
 
															-/**
														
 
															- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															- *
														
 
															- * See LICENSE for clarification regarding multiple authors
														
 
															- *
														
 
															- * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															- * you may not use this file except in compliance with the License.
														
 
															- * You may obtain a copy of the License at
														
 
															- *
														
 
															- *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															- *
														
 
															- * Unless required by applicable law or agreed to in writing, software
														
 
															- * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															- * See the License for the specific language governing permissions and
														
 
															- * limitations under the License.
														
 
															- */
														
 
															-
														
 
															-#include "sherpa-ncnn/python/csrc/decode.h"
														
 
															-
														
 
															-#include "sherpa-ncnn/csrc/decode.h"
														
 
															-#include "sherpa-ncnn/csrc/model.h"
														
 
															-#include "sherpa-ncnn/python/csrc/mat-util.h"
														
 
															-
														
 
															-namespace sherpa_ncnn {
														
 
															-
														
 
															-static void PybindGreedySearch(py::module *m) {
														
 
															-  m->def(
														
 
															-      "greedy_search",
														
 
															-      [](Model *model, py::array _encoder_out, py::array _decoder_out,
														
 
															-         std::vector<int32_t> hyp)
														
 
															-          -> std::pair<py::array, std::vector<int32_t>> {
														
 
															-        ncnn::Mat encoder_out = ArrayToMat(_encoder_out);
														
 
															-        ncnn::Mat decoder_out = ArrayToMat(_decoder_out);
														
 
															-
														
 
															-        GreedySearch(model, encoder_out, &decoder_out, &hyp);
														
 
															-
														
 
															-        return {MatToArray(decoder_out), hyp};
														
 
															-      },
														
 
															-      py::arg("model"), py::arg("encoder_out"), py::arg("decoder_out"),
														
 
															-      py::arg("hyp"));
														
 
															-}
														
 
															-
														
 
															-void PybindDecode(py::module *m) { PybindGreedySearch(m); }
														
 
															-
														
 
															-}  // namespace sherpa_ncnn
														
--- a/sherpa-ncnn/python/csrc/endpoint.cc
+++ b/sherpa-ncnn/python/csrc/endpoint.cc
@@ -0,0 +1,114 @@
 
															+/**
														
 
															+ * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															+ *
														
 
															+ * See LICENSE for clarification regarding multiple authors
														
 
															+ *
														
 
															+ * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+ * you may not use this file except in compliance with the License.
														
 
															+ * You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+#include "sherpa-ncnn/python/csrc/endpoint.h"
														
 
															+
														
 
															+#include <memory>
														
 
															+#include <string>
														
 
															+
														
 
															+#include "sherpa-ncnn/csrc/endpoint.h"
														
 
															+
														
 
															+namespace sherpa_ncnn {
														
 
															+
														
 
															+static constexpr const char *kEndpointRuleInitDoc = R"doc(
														
 
															+Constructor for EndpointRule.
														
 
															+
														
 
															+Args:
														
 
															+  must_contain_nonsilence:
														
 
															+    If True, for this endpointing rule to apply there must be nonsilence in the
														
 
															+    best-path traceback. For decoding, a non-blank token is considered as
														
 
															+    non-silence.
														
 
															+  min_trailing_silence:
														
 
															+    This endpointing rule requires duration of trailing silence (in seconds)
														
 
															+    to be ``>=`` this value.
														
 
															+  min_utterance_length:
														
 
															+    This endpointing rule requires utterance-length (in seconds) to
														
 
															+    be ``>=`` this value.
														
 
															+)doc";
														
 
															+
														
 
															+static constexpr const char *kEndpointConfigInitDoc = R"doc(
														
 
															+If any rule in EndpointConfig is activated, it is said that an endpointing
														
 
															+is detected.
														
 
															+
														
 
															+Args:
														
 
															+  rule1:
														
 
															+    By default, it times out after 2.4 seconds of silence, even if
														
 
															+    we decoded nothing.
														
 
															+  rule2:
														
 
															+    By default, it times out after 1.2 seconds of silence after decoding
														
 
															+    something.
														
 
															+  rule3:
														
 
															+    By default, it times out after the utterance is 20 seconds long, regardless of
														
 
															+    anything else.
														
 
															+)doc";
														
 
															+
														
 
															+static void PybindEndpointRule(py::module *m) {
														
 
															+  using PyClass = EndpointRule;
														
 
															+  py::class_<PyClass>(*m, "EndpointRule")
														
 
															+      .def(py::init<bool, float, float>(), py::arg("must_contain_nonsilence"),
														
 
															+           py::arg("min_trailing_silence"), py::arg("min_utterance_length"),
														
 
															+           kEndpointRuleInitDoc)
														
 
															+      .def("__str__", &PyClass::ToString)
														
 
															+      .def_readwrite("must_contain_nonsilence",
														
 
															+                     &PyClass::must_contain_nonsilence)
														
 
															+      .def_readwrite("min_trailing_silence", &PyClass::min_trailing_silence)
														
 
															+      .def_readwrite("min_utterance_length", &PyClass::min_utterance_length);
														
 
															+}
														
 
															+
														
 
															+static void PybindEndpointConfig(py::module *m) {
														
 
															+  using PyClass = EndpointConfig;
														
 
															+  py::class_<PyClass>(*m, "EndpointConfig")
														
 
															+      .def(
														
 
															+          py::init(
														
 
															+              [](float rule1_min_trailing_silence,
														
 
															+                 float rule2_min_trailing_silence,
														
 
															+                 float rule3_min_utterance_length) -> std::unique_ptr<PyClass> {
														
 
															+                EndpointRule rule1(false, rule1_min_trailing_silence, 0);
														
 
															+                EndpointRule rule2(true, rule2_min_trailing_silence, 0);
														
 
															+                EndpointRule rule3(false, 0, rule3_min_utterance_length);
														
 
															+
														
 
															+                return std::make_unique<EndpointConfig>(rule1, rule2, rule3);
														
 
															+              }),
														
 
															+          py::arg("rule1_min_trailing_silence"),
														
 
															+          py::arg("rule2_min_trailing_silence"),
														
 
															+          py::arg("rule3_min_utterance_length"))
														
 
															+      .def(py::init([](const EndpointRule &rule1, const EndpointRule &rule2,
														
 
															+                       const EndpointRule &rule3) -> std::unique_ptr<PyClass> {
														
 
															+             auto ans = std::make_unique<PyClass>();
														
 
															+             ans->rule1 = rule1;
														
 
															+             ans->rule2 = rule2;
														
 
															+             ans->rule3 = rule3;
														
 
															+             return ans;
														
 
															+           }),
														
 
															+           py::arg("rule1") = EndpointRule(false, 2.4, 0),
														
 
															+           py::arg("rule2") = EndpointRule(true, 1.2, 0),
														
 
															+           py::arg("rule3") = EndpointRule(false, 0, 20),
														
 
															+           kEndpointConfigInitDoc)
														
 
															+      .def("__str__",
														
 
															+           [](const PyClass &self) -> std::string { return self.ToString(); })
														
 
															+      .def_readwrite("rule1", &PyClass::rule1)
														
 
															+      .def_readwrite("rule2", &PyClass::rule2)
														
 
															+      .def_readwrite("rule3", &PyClass::rule3);
														
 
															+}
														
 
															+
														
 
															+void PybindEndpoint(py::module *m) {
														
 
															+  PybindEndpointRule(m);
														
 
															+  PybindEndpointConfig(m);
														
 
															+}
														
 
															+
														
 
															+}  // namespace sherpa_ncnn
														
--- a/sherpa-ncnn/python/csrc/endpoint.h
+++ b/sherpa-ncnn/python/csrc/endpoint.h
@@ -1,5 +1,5 @@
 
															 /**
														
 
															- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															+ * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															  *
														
 
															  * See LICENSE for clarification regarding multiple authors
														
 
															  *
														
@@ -16,15 +16,15 @@
 
															  * limitations under the License.
														
 
															  */
														
 
															-#ifndef SHERPA_NCNN_PYTHON_CSRC_DECODE_H_
														
 
															-#define SHERPA_NCNN_PYTHON_CSRC_DECODE_H_
														
 
															+#ifndef SHERPA_NCNN_PYTHON_CSRC_ENDPOINT_H_
														
 
															+#define SHERPA_NCNN_PYTHON_CSRC_ENDPOINT_H_
														
 
															 #include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"
														
 
															 namespace sherpa_ncnn {
														
 
															-void PybindDecode(py::module *m);
														
 
															+void PybindEndpoint(py::module *m);
														
 
															 }  // namespace sherpa_ncnn
														
 
															-#endif  // SHERPA_NCNN_PYTHON_CSRC_DECODE_H_
														
 
															+#endif  // SHERPA_NCNN_PYTHON_CSRC_ENDPOINT_H_
														
--- a/sherpa-ncnn/python/csrc/features.cc
+++ b/sherpa-ncnn/python/csrc/features.cc
@@ -1,56 +0,0 @@
 
															-/**
														
 
															- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															- *
														
 
															- * See LICENSE for clarification regarding multiple authors
														
 
															- *
														
 
															- * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															- * you may not use this file except in compliance with the License.
														
 
															- * You may obtain a copy of the License at
														
 
															- *
														
 
															- *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															- *
														
 
															- * Unless required by applicable law or agreed to in writing, software
														
 
															- * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															- * See the License for the specific language governing permissions and
														
 
															- * limitations under the License.
														
 
															- */
														
 
															-
														
 
															-#include "sherpa-ncnn/csrc/features.h"
														
 
															-
														
 
															-#include "sherpa-ncnn/python/csrc/mat-util.h"
														
 
															-#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"
														
 
															-
														
 
															-namespace sherpa_ncnn {
														
 
															-
														
 
															-void PybindFeatures(py::module *m) {
														
 
															-  using PyClass = FeatureExtractor;
														
 
															-
														
 
															-  py::class_<PyClass>(*m, "FeatureExtractor")
														
 
															-      .def(py::init([](int32_t feature_dim,
														
 
															-                       float sample_rate) -> std::unique_ptr<PyClass> {
														
 
															-             knf::FbankOptions fbank_opts;
														
 
															-             fbank_opts.frame_opts.dither = 0;
														
 
															-             fbank_opts.frame_opts.snip_edges = false;
														
 
															-             fbank_opts.frame_opts.samp_freq = sample_rate;
														
 
															-             fbank_opts.mel_opts.num_bins = feature_dim;
														
 
															-
														
 
															-             return std::make_unique<PyClass>(fbank_opts);
														
 
															-           }),
														
 
															-           py::arg("feature_dim"), py::arg("sample_rate"))
														
 
															-      .def("accept_waveform",
														
 
															-           [](PyClass &self, float sample_rate, py::array_t<float> waveform) {
														
 
															-             self.AcceptWaveform(sample_rate, waveform.data(), waveform.size());
														
 
															-           })
														
 
															-      .def("input_finished", &PyClass::InputFinished)
														
 
															-      .def_property_readonly("num_frames_ready", &PyClass::NumFramesReady)
														
 
															-      .def("is_last_frame", &PyClass::IsLastFrame, py::arg("frame"))
														
 
															-      .def("get_frames",
														
 
															-           [](PyClass &self, int32_t frame_index, int32_t n) -> py::array {
														
 
															-             ncnn::Mat frames = self.GetFrames(frame_index, n);
														
 
															-             return MatToArray(frames);
														
 
															-           })
														
 
															-      .def("reset", &PyClass::Reset);
														
 
															-}
														
 
															-
														
 
															-}  // namespace sherpa_ncnn
														
--- a/sherpa-ncnn/python/csrc/mat-util.cc
+++ b/sherpa-ncnn/python/csrc/mat-util.cc
@@ -1,97 +0,0 @@
 
															-/**
														
 
															- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															- *
														
 
															- * See LICENSE for clarification regarding multiple authors
														
 
															- *
														
 
															- * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															- * you may not use this file except in compliance with the License.
														
 
															- * You may obtain a copy of the License at
														
 
															- *
														
 
															- *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															- *
														
 
															- * Unless required by applicable law or agreed to in writing, software
														
 
															- * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															- * See the License for the specific language governing permissions and
														
 
															- * limitations under the License.
														
 
															- */
														
 
															-
														
 
															-#include "sherpa-ncnn/python/csrc/mat-util.h"
														
 
															-
														
 
															-namespace sherpa_ncnn {
														
 
															-
														
 
															-struct KeepMatAlive {
														
 
															-  explicit KeepMatAlive(ncnn::Mat m) : m(m) {}
														
 
															-
														
 
															-  ncnn::Mat m;
														
 
															-};
														
 
															-
														
 
															-py::array_t<float> MatToArray(ncnn::Mat m) {
														
 
															-  std::vector<py::ssize_t> shape;
														
 
															-  std::vector<py::ssize_t> strides;
														
 
															-  if (m.dims == 1) {
														
 
															-    shape.push_back(m.w);
														
 
															-    strides.push_back(m.elemsize);
														
 
															-  } else if (m.dims == 2) {
														
 
															-    shape.push_back(m.h);
														
 
															-    shape.push_back(m.w);
														
 
															-    strides.push_back(m.w * m.elemsize);
														
 
															-    strides.push_back(m.elemsize);
														
 
															-  } else if (m.dims == 3) {
														
 
															-    shape.push_back(m.c);
														
 
															-    shape.push_back(m.h);
														
 
															-    shape.push_back(m.w);
														
 
															-    strides.push_back(m.cstep * m.elemsize);
														
 
															-    strides.push_back(m.w * m.elemsize);
														
 
															-    strides.push_back(m.elemsize);
														
 
															-  } else if (m.dims == 4) {
														
 
															-    shape.push_back(m.c);
														
 
															-    shape.push_back(m.d);
														
 
															-    shape.push_back(m.h);
														
 
															-    shape.push_back(m.w);
														
 
															-    strides.push_back(m.cstep * m.elemsize);
														
 
															-    strides.push_back(m.w * m.h * m.elemsize);
														
 
															-    strides.push_back(m.w * m.elemsize);
														
 
															-    strides.push_back(m.elemsize);
														
 
															-  }
														
 
															-
														
 
															-  auto keep_mat_alive = new KeepMatAlive(m);
														
 
															-  py::capsule handle(keep_mat_alive, [](void *p) {
														
 
															-    delete reinterpret_cast<KeepMatAlive *>(p);
														
 
															-  });
														
 
															-
														
 
															-  return py::array_t<float>(shape, strides, (float *)m.data, handle);
														
 
															-}
														
 
															-
														
 
															-ncnn::Mat ArrayToMat(py::array array) {
														
 
															-  py::buffer_info info = array.request();
														
 
															-  size_t elemsize = info.itemsize;
														
 
															-
														
 
															-  ncnn::Mat ans;
														
 
															-
														
 
															-  if (info.ndim == 1) {
														
 
															-    ans = ncnn::Mat((int)info.shape[0], info.ptr, elemsize);
														
 
															-  } else if (info.ndim == 2) {
														
 
															-    ans = ncnn::Mat((int)info.shape[1], (int)info.shape[0], info.ptr, elemsize);
														
 
															-  } else if (info.ndim == 3) {
														
 
															-    ans = ncnn::Mat((int)info.shape[2], (int)info.shape[1], (int)info.shape[0],
														
 
															-                    info.ptr, elemsize);
														
 
															-
														
 
															-    // in ncnn, buffer to construct ncnn::Mat need align to ncnn::alignSize
														
 
															-    // with (w * h * elemsize, 16) / elemsize, but the buffer from numpy not
														
 
															-    // so we set the cstep as numpy's cstep
														
 
															-    ans.cstep = (int)info.shape[2] * (int)info.shape[1];
														
 
															-  } else if (info.ndim == 4) {
														
 
															-    ans = ncnn::Mat((int)info.shape[3], (int)info.shape[2], (int)info.shape[1],
														
 
															-                    (int)info.shape[0], info.ptr, elemsize);
														
 
															-
														
 
															-    // in ncnn, buffer to construct ncnn::Mat need align to ncnn::alignSize
														
 
															-    // with (w * h * d elemsize, 16) / elemsize, but the buffer from numpy not
														
 
															-    // so we set the cstep as numpy's cstep
														
 
															-    ans.cstep = (int)info.shape[3] * (int)info.shape[2] * (int)info.shape[1];
														
 
															-  }
														
 
															-
														
 
															-  return ans;
														
 
															-}
														
 
															-
														
 
															-}  // namespace sherpa_ncnn
														
--- a/sherpa-ncnn/python/csrc/mat-util.h
+++ b/sherpa-ncnn/python/csrc/mat-util.h
@@ -1,37 +0,0 @@
 
															-/**
														
 
															- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															- *
														
 
															- * See LICENSE for clarification regarding multiple authors
														
 
															- *
														
 
															- * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															- * you may not use this file except in compliance with the License.
														
 
															- * You may obtain a copy of the License at
														
 
															- *
														
 
															- *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															- *
														
 
															- * Unless required by applicable law or agreed to in writing, software
														
 
															- * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															- * See the License for the specific language governing permissions and
														
 
															- * limitations under the License.
														
 
															- */
														
 
															-
														
 
															-#ifndef SHERPA_NCNN_PYTHON_CSRC_MAT_UTIL_H_
														
 
															-#define SHERPA_NCNN_PYTHON_CSRC_MAT_UTIL_H_
														
 
															-
														
 
															-#include "mat.h"
														
 
															-#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"
														
 
															-
														
 
															-namespace sherpa_ncnn {
														
 
															-
														
 
															-// Convert a ncnn::Mat to a numpy array. Data is shared.
														
 
															-//
														
 
															-// @param m It should be a float unpacked matrix
														
 
															-py::array_t<float> MatToArray(ncnn::Mat m);
														
 
															-
														
 
															-// convert an array to a ncnn::Mat
														
 
															-ncnn::Mat ArrayToMat(py::array array);
														
 
															-
														
 
															-}  // namespace sherpa_ncnn
														
 
															-
														
 
															-#endif  // SHERPA_NCNN_PYTHON_CSRC_MODEL_UTIL_H_
														
--- a/sherpa-ncnn/python/csrc/model.cc
+++ b/sherpa-ncnn/python/csrc/model.cc
@@ -22,7 +22,6 @@
 
															 #include <string>
														
 
															 #include "sherpa-ncnn/csrc/model.h"
														
 
															-#include "sherpa-ncnn/python/csrc/mat-util.h"
														
 
															 namespace sherpa_ncnn {
														
@@ -48,6 +47,8 @@ Args:
 
															     Path to joiner.ncnn.bin.
														
 
															   num_threads:
														
 
															     Number of threads to use for neural network computation.
														
 
															+  tokens:
														
 
															+    Path to tokens.txt
														
 
															 )doc";
														
 
															 static void PybindModelConfig(py::module *m) {
														
@@ -58,8 +59,8 @@ static void PybindModelConfig(py::module *m) {
 
															                        const std::string &decoder_param,
														
 
															                        const std::string &decoder_bin,
														
 
															                        const std::string &joiner_param,
														
 
															-                       const std::string &joiner_bin,
														
 
															-                       int32_t num_threads) -> std::unique_ptr<PyClass> {
														
 
															+                       const std::string &joiner_bin, int32_t num_threads,
														
 
															+                       const std::string &tokens) -> std::unique_ptr<PyClass> {
														
 
															              auto ans = std::make_unique<PyClass>();
														
 
															              ans->encoder_param = encoder_param;
														
 
															              ans->encoder_bin = encoder_bin;
														
@@ -67,6 +68,7 @@ static void PybindModelConfig(py::module *m) {
 
															              ans->decoder_bin = decoder_bin;
														
 
															              ans->joiner_param = joiner_param;
														
 
															              ans->joiner_bin = joiner_bin;
														
 
															+             ans->tokens = tokens;
														
 
															              ans->use_vulkan_compute = false;
														
@@ -79,66 +81,9 @@ static void PybindModelConfig(py::module *m) {
 
															            py::arg("encoder_param"), py::arg("encoder_bin"),
														
 
															            py::arg("decoder_param"), py::arg("decoder_bin"),
														
 
															            py::arg("joiner_param"), py::arg("joiner_bin"),
														
 
															-           py::arg("num_threads"), kModelConfigInitDoc);
														
 
															+           py::arg("num_threads"), py::arg("tokens"), kModelConfigInitDoc);
														
 
															 }
														
 
															-void PybindModel(py::module *m) {
														
 
															-  PybindModelConfig(m);
														
 
															-
														
 
															-  using PyClass = Model;
														
 
															-  py::class_<PyClass>(*m, "Model")
														
 
															-      .def_static("create", &PyClass::Create, py::arg("config"))
														
 
															-      .def(
														
 
															-          "run_encoder",
														
 
															-          [](PyClass &self, py::array _features,
														
 
															-             const std::vector<py::array> &_states)
														
 
															-              -> std::pair<py::array, std::vector<py::array>> {
														
 
															-            ncnn::Mat features = ArrayToMat(_features);
														
 
															-
														
 
															-            std::vector<ncnn::Mat> states;
														
 
															-            states.reserve(_states.size());
														
 
															-            for (const auto &s : _states) {
														
 
															-              states.push_back(ArrayToMat(s));
														
 
															-            }
														
 
															-
														
 
															-            ncnn::Mat encoder_out;
														
 
															-            std::vector<ncnn::Mat> _next_states;
														
 
															-
														
 
															-            std::tie(encoder_out, _next_states) =
														
 
															-                self.RunEncoder(features, states);
														
 
															-
														
 
															-            std::vector<py::array> next_states;
														
 
															-            next_states.reserve(_next_states.size());
														
 
															-            for (const auto &m : _next_states) {
														
 
															-              next_states.push_back(MatToArray(m));
														
 
															-            }
														
 
															-
														
 
															-            return std::make_pair(MatToArray(encoder_out), next_states);
														
 
															-          },
														
 
															-          py::arg("features"), py::arg("states"))
														
 
															-      .def(
														
 
															-          "run_decoder",
														
 
															-          [](PyClass &self, py::array _decoder_input) -> py::array {
														
 
															-            ncnn::Mat decoder_input = ArrayToMat(_decoder_input);
														
 
															-            ncnn::Mat decoder_out = self.RunDecoder(decoder_input);
														
 
															-            return MatToArray(decoder_out);
														
 
															-          },
														
 
															-          py::arg("decoder_input"))
														
 
															-      .def(
														
 
															-          "run_joiner",
														
 
															-          [](PyClass &self, py::array _encoder_out,
														
 
															-             py::array _decoder_out) -> py::array {
														
 
															-            ncnn::Mat encoder_out = ArrayToMat(_encoder_out);
														
 
															-            ncnn::Mat decoder_out = ArrayToMat(_decoder_out);
														
 
															-            ncnn::Mat joiner_out = self.RunJoiner(encoder_out, decoder_out);
														
 
															-
														
 
															-            return MatToArray(joiner_out);
														
 
															-          },
														
 
															-          py::arg("encoder_out"), py::arg("decoder_out"))
														
 
															-      .def_property_readonly("context_size", &PyClass::ContextSize)
														
 
															-      .def_property_readonly("blank_id", &PyClass::BlankId)
														
 
															-      .def_property_readonly("segment", &PyClass::Segment)
														
 
															-      .def_property_readonly("offset", &PyClass::Offset);
														
 
															-}
														
 
															+void PybindModel(py::module *m) { PybindModelConfig(m); }
														
 
															 }  // namespace sherpa_ncnn
														
--- a/sherpa-ncnn/python/csrc/recognizer.cc
+++ b/sherpa-ncnn/python/csrc/recognizer.cc
@@ -0,0 +1,104 @@
 
															+/**
														
 
															+ * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															+ *
														
 
															+ * See LICENSE for clarification regarding multiple authors
														
 
															+ *
														
 
															+ * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+ * you may not use this file except in compliance with the License.
														
 
															+ * You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+#include "sherpa-ncnn/python/csrc/recognizer.h"
														
 
															+
														
 
															+#include <memory>
														
 
															+#include <string>
														
 
															+
														
 
															+#include "sherpa-ncnn/csrc/recognizer.h"
														
 
															+
														
 
															+namespace sherpa_ncnn {
														
 
															+
														
 
															+static constexpr const char *kDecoderConfigInitDoc = R"doc(
														
 
															+Constructor for DecoderConfig.
														
 
															+
														
 
															+Args:
														
 
															+  method:
														
 
															+    Decoding method. Supported values are: greedy_search, modified_beam_search.
														
 
															+  num_active_paths:
														
 
															+    Used only when method is modified_beam_search. It specifies the number of
														
 
															+    actives paths during beam search.
														
 
															+  enable_endpoint:
														
 
															+    True to enable endpoint detection. False to disable endpoint detection.
														
 
															+  endpoint_config:
														
 
															+    Used only when ``enable_endpoint`` is True.
														
 
															+)doc";
														
 
															+
														
 
															+static void PybindRecognitionResult(py::module *m) {
														
 
															+  using PyClass = RecognitionResult;
														
 
															+  py::class_<PyClass>(*m, "RecognitionResult")
														
 
															+      .def_property_readonly(
														
 
															+          "text", [](PyClass &self) -> std::string { return self.text; });
														
 
															+}
														
 
															+
														
 
															+static void PybindDecoderConfig(py::module *m) {
														
 
															+  using PyClass = DecoderConfig;
														
 
															+  py::class_<PyClass>(*m, "DecoderConfig")
														
 
															+      .def(py::init<const std::string &, int32_t, bool,
														
 
															+                    const EndpointConfig &>(),
														
 
															+           py::arg("method"), py::arg("num_active_paths"),
														
 
															+           py::arg("enable_endpoint"), py::arg("endpoint_config"),
														
 
															+           kDecoderConfigInitDoc)
														
 
															+      .def("__str__", &PyClass::ToString)
														
 
															+      .def_property_readonly("method",
														
 
															+                             [](const PyClass &self) { return self.method; })
														
 
															+      .def_property_readonly(
														
 
															+          "num_active_paths",
														
 
															+          [](const PyClass &self) { return self.num_active_paths; })
														
 
															+      .def_property_readonly(
														
 
															+          "enable_endpoint",
														
 
															+          [](const PyClass &self) { return self.enable_endpoint; })
														
 
															+      .def_property_readonly("endpoint_config", [](const PyClass &self) {
														
 
															+        return self.endpoint_config;
														
 
															+      });
														
 
															+}
														
 
															+
														
 
															+void PybindRecognizer(py::module *m) {
														
 
															+  PybindRecognitionResult(m);
														
 
															+  PybindDecoderConfig(m);
														
 
															+
														
 
															+  using PyClass = Recognizer;
														
 
															+  py::class_<PyClass>(*m, "Recognizer")
														
 
															+      .def(py::init([](const DecoderConfig &decoder_config,
														
 
															+                       const ModelConfig &model_config,
														
 
															+                       float sample_rate = 16000) -> std::unique_ptr<PyClass> {
														
 
															+             knf::FbankOptions fbank_opts;
														
 
															+             fbank_opts.frame_opts.dither = 0;
														
 
															+             fbank_opts.frame_opts.snip_edges = false;
														
 
															+             fbank_opts.frame_opts.samp_freq = sample_rate;
														
 
															+             fbank_opts.mel_opts.num_bins = 80;
														
 
															+
														
 
															+             return std::make_unique<PyClass>(decoder_config, model_config,
														
 
															+                                              fbank_opts);
														
 
															+           }),
														
 
															+           py::arg("decoder_config"), py::arg("model_config"),
														
 
															+           py::arg("sample_rate") = 16000)
														
 
															+      .def("accept_waveform",
														
 
															+           [](PyClass &self, float sample_rate, py::array_t<float> waveform) {
														
 
															+             self.AcceptWaveform(sample_rate, waveform.data(), waveform.size());
														
 
															+           })
														
 
															+      .def("input_finished", &PyClass::InputFinished)
														
 
															+      .def("decode", &PyClass::Decode)
														
 
															+      .def_property_readonly("result",
														
 
															+                             [](PyClass &self) { return self.GetResult(); })
														
 
															+      .def("is_endpoint", &PyClass::IsEndpoint)
														
 
															+      .def("reset", &PyClass::Reset);
														
 
															+}
														
 
															+
														
 
															+}  // namespace sherpa_ncnn
														
--- a/sherpa-ncnn/python/csrc/recognizer.h
+++ b/sherpa-ncnn/python/csrc/recognizer.h
@@ -1,5 +1,5 @@
 
															 /**
														
 
															- * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															+ * Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															  *
														
 
															  * See LICENSE for clarification regarding multiple authors
														
 
															  *
														
@@ -16,15 +16,15 @@
 
															  * limitations under the License.
														
 
															  */
														
 
															-#ifndef SHERPA_NCNN_PYTHON_CSRC_FEATURES_H_
														
 
															-#define SHERPA_NCNN_PYTHON_CSRC_FEATURES_H_
														
 
															+#ifndef SHERPA_NCNN_PYTHON_CSRC_RECOGNIZER_H_
														
 
															+#define SHERPA_NCNN_PYTHON_CSRC_RECOGNIZER_H_
														
 
															 #include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"
														
 
															 namespace sherpa_ncnn {
														
 
															-void PybindFeatures(py::module *m);
														
 
															+void PybindRecognizer(py::module *m);
														
 
															 }  // namespace sherpa_ncnn
														
 
															-#endif  // SHERPA_NCNN_PYTHON_CSRC_FEATURES_H_
														
 
															+#endif  // SHERPA_NCNN_PYTHON_CSRC_RECOGNIZER_H_
														
--- a/sherpa-ncnn/python/csrc/sherpa-ncnn.cc
+++ b/sherpa-ncnn/python/csrc/sherpa-ncnn.cc
@@ -18,20 +18,18 @@
 
															 #include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"
														
 
															-#include "sherpa-ncnn/python/csrc/decode.h"
														
 
															-#include "sherpa-ncnn/python/csrc/features.h"
														
 
															+#include "sherpa-ncnn/python/csrc/endpoint.h"
														
 
															 #include "sherpa-ncnn/python/csrc/model.h"
														
 
															+#include "sherpa-ncnn/python/csrc/recognizer.h"
														
 
															 namespace sherpa_ncnn {
														
 
															 PYBIND11_MODULE(_sherpa_ncnn, m) {
														
 
															   m.doc() = "pybind11 binding of sherpa-ncnn";
														
 
															+  PybindEndpoint(&m);
														
 
															   PybindModel(&m);
														
 
															-
														
 
															-  PybindFeatures(&m);
														
 
															-
														
 
															-  PybindDecode(&m);
														
 
															+  PybindRecognizer(&m);
														
 
															 }
														
 
															 }  // namespace sherpa_ncnn
														
--- a/sherpa-ncnn/python/sherpa_ncnn/__init__.py
+++ b/sherpa-ncnn/python/sherpa_ncnn/__init__.py
@@ -1,3 +1 @@
 
															-from _sherpa_ncnn import FeatureExtractor, Model, ModelConfig, greedy_search
														
 
															-
														
 
															 from .recognizer import Recognizer
														
--- a/sherpa-ncnn/python/sherpa_ncnn/recognizer.py
+++ b/sherpa-ncnn/python/sherpa_ncnn/recognizer.py
@@ -1,24 +1,19 @@
 
															 from pathlib import Path
														
 
															 import numpy as np
														
 
															-from _sherpa_ncnn import FeatureExtractor, Model, ModelConfig, greedy_search
														
 
															+from _sherpa_ncnn import (
														
 
															+    DecoderConfig,
														
 
															+    EndpointConfig,
														
 
															+    EndpointRule,
														
 
															+    ModelConfig,
														
 
															+)
														
 
															+from _sherpa_ncnn import Recognizer as _Recognizer
														
 
															 def _assert_file_exists(f: str):
														
 
															     assert Path(f).is_file(), f"{f} does not exist"
														
 
															-def _read_tokens(tokens):
														
 
															-    sym_table = {}
														
 
															-    with open(tokens, "r", encoding="utf-8") as f:
														
 
															-        for line in f:
														
 
															-            sym, i = line.split()
														
 
															-            sym = sym.replace("▁", " ")
														
 
															-            sym_table[int(i)] = sym
														
 
															-
														
 
															-    return sym_table
														
 
															-
														
 
															-
														
 
															 class Recognizer(object):
														
 
															     """A class for streaming speech recognition.
														
@@ -88,6 +83,12 @@ class Recognizer(object):
 
															         joiner_param: str,
														
 
															         joiner_bin: str,
														
 
															         num_threads: int = 4,
														
 
															+        decoding_method: str = "greedy_search",
														
 
															+        num_active_paths: int = 4,
														
 
															+        enable_endpoint_detection: bool = False,
														
 
															+        rule1_min_trailing_silence: int = 2.4,
														
 
															+        rule2_min_trailing_silence: int = 1.2,
														
 
															+        rule3_min_utterance_length: int = 20,
														
 
															     ):
														
 
															         """
														
 
															         Please refer to
														
@@ -101,6 +102,7 @@ class Recognizer(object):
 
															             columns::
														
 
															                 symbol integer_id
														
 
															+
														
 
															           encoder_param:
														
 
															             Path to ``encoder.ncnn.param``.
														
 
															           encoder_bin:
														
@@ -115,6 +117,28 @@ class Recognizer(object):
 
															             Path to ``joiner.ncnn.bin``.
														
 
															           num_threads:
														
 
															             Number of threads for neural network computation.
														
 
															+          decoding_method:
														
 
															+            Valid decoding methods are: greedy_search, modified_beam_search.
														
 
															+          num_active_paths:
														
 
															+            Used only when decoding_method is modified_beam_search. Its value
														
 
															+            is ignored when decoding_method is greedy_search. It specifies
														
 
															+            the maximum number of paths to use in beam search.
														
 
															+          enable_endpoint_detection:
														
 
															+            True to enable endpoint detection. False to disable endpoint
														
 
															+            detection.
														
 
															+          rule1_min_trailing_silence:
														
 
															+            Used only when enable_endpoint_detection is True. If the duration
														
 
															+            of trailing silence in seconds is larger than this value, we assume
														
 
															+            an endpoint is detected.
														
 
															+          rule2_min_trailing_silence:
														
 
															+            Used only when enable_endpoint_detection is True. If we have decoded
														
 
															+            something that is nonsilence and if the duration of trailing silence
														
 
															+            in seconds is larger than this value, we assume an endpoint is
														
 
															+            detected.
														
 
															+          rule3_min_utterance_length:
														
 
															+            Used only when enable_endpoint_detection is True. If the utterance
														
 
															+            length in seconds is larger than this value, we assume an endpoint
														
 
															+            is detected.
														
 
															         """
														
 
															         _assert_file_exists(tokens)
														
 
															         _assert_file_exists(encoder_param)
														
@@ -125,8 +149,10 @@ class Recognizer(object):
 
															         _assert_file_exists(joiner_bin)
														
 
															         assert num_threads > 0, num_threads
														
 
															-
														
 
															-        self.sym_table = _read_tokens(tokens)
														
 
															+        assert decoding_method in (
														
 
															+            "greedy_search",
														
 
															+            "modified_beam_search",
														
 
															+        ), decoding_method
														
 
															         model_config = ModelConfig(
														
 
															             encoder_param=encoder_param,
														
@@ -136,23 +162,30 @@ class Recognizer(object):
 
															             joiner_param=joiner_param,
														
 
															             joiner_bin=joiner_bin,
														
 
															             num_threads=num_threads,
														
 
															+            tokens=tokens,
														
 
															         )
														
 
															-        self.model = Model.create(model_config)
														
 
															-        self.sample_rate = 16000
														
 
															-
														
 
															-        self.feature_extractor = FeatureExtractor(
														
 
															-            feature_dim=80,
														
 
															-            sample_rate=self.sample_rate,
														
 
															+        endpoint_config = EndpointConfig(
														
 
															+            rule1_min_trailing_silence=rule1_min_trailing_silence,
														
 
															+            rule2_min_trailing_silence=rule2_min_trailing_silence,
														
 
															+            rule3_min_utterance_length=rule3_min_utterance_length,
														
 
															         )
														
 
															-        self.num_processed = 0  # number of processed feature frames so far
														
 
															-        self.states = []  # model state
														
 
															+        decoder_config = DecoderConfig(
														
 
															+            method=decoding_method,
														
 
															+            num_active_paths=num_active_paths,
														
 
															+            enable_endpoint=enable_endpoint_detection,
														
 
															+            endpoint_config=endpoint_config,
														
 
															+        )
														
 
															-        self.hyp = [0] * self.model.context_size  # initial hypothesis
														
 
															+        # all of our current models are using 16 kHz audio samples
														
 
															+        self.sample_rate = 16000
														
 
															-        decoder_input = np.array(self.hyp, dtype=np.int32)
														
 
															-        self.decoder_out = self.model.run_decoder(decoder_input)
														
 
															+        self.recognizer = _Recognizer(
														
 
															+            decoder_config=decoder_config,
														
 
															+            model_config=model_config,
														
 
															+            sample_rate=self.sample_rate,
														
 
															+        )
														
 
															     def accept_waveform(self, sample_rate: float, waveform: np.array):
														
 
															         """Decode audio samples.
														
@@ -165,37 +198,18 @@ class Recognizer(object):
 
															             range ``[-1, 1]``.
														
 
															         """
														
 
															         assert sample_rate == self.sample_rate, (sample_rate, self.sample_rate)
														
 
															-        self.feature_extractor.accept_waveform(sample_rate, waveform)
														
 
															-
														
 
															-        self._decode()
														
 
															+        self.recognizer.accept_waveform(sample_rate, waveform)
														
 
															+        self.recognizer.decode()
														
 
															     def input_finished(self):
														
 
															         """Signal that no more audio samples are available."""
														
 
															-        self.feature_extractor.input_finished()
														
 
															-        self._decode()
														
 
															+        self.recognizer.input_finished()
														
 
															+        self.recognizer.decode()
														
 
															     @property
														
 
															     def text(self):
														
 
															-        context_size = self.model.context_size
														
 
															-        text = [self.sym_table[token] for token in self.hyp[context_size:]]
														
 
															-        return "".join(text)
														
 
															-
														
 
															-    def _decode(self):
														
 
															-        segment = self.model.segment
														
 
															-        offset = self.model.offset
														
 
															-
														
 
															-        while self.feature_extractor.num_frames_ready - self.num_processed >= segment:
														
 
															-            features = self.feature_extractor.get_frames(self.num_processed, segment)
														
 
															-            self.num_processed += offset
														
 
															+        return self.recognizer.result.text
														
 
															-            encoder_out, self.states = self.model.run_encoder(
														
 
															-                features=features,
														
 
															-                states=self.states,
														
 
															-            )
														
 
															-
														
 
															-            self.decoder_out, self.hyp = greedy_search(
														
 
															-                model=self.model,
														
 
															-                encoder_out=encoder_out,
														
 
															-                decoder_out=self.decoder_out,
														
 
															-                hyp=self.hyp,
														
 
															-            )
														
 
															+    @property
														
 
															+    def is_endpoint(self):
														
 
															+        return self.recognizer.is_endpoint()
	`@@ -0,0 +1 @@`
			`+exclude_files=generate-int8-scale-table.cc`