2 жил өмнө · b2d47bcf16
--- a/python-api-examples/README.md
+++ b/python-api-examples/README.md
@@ -0,0 +1,9 @@
 
				+# Introduction
			
 
				+
			
 
				+## decode-file.py
			
 
				+
			
 
				+This file shows how to recognize a file.
			
 
				+
			
 
				+## speech-recognition-from-microphone.py
			
 
				+
			
 
				+This file demonstrates how to do real-time speech recognition with a microphone.
			
--- a/python-api-examples/decode-file.py
+++ b/python-api-examples/decode-file.py
@@ -0,0 +1,57 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+"""
			
 
				+This file demonstrates how to use sherpa-ncnn Python API to recognize
			
 
				+a single file.
			
 
				+
			
 
				+Please refer to
			
 
				+https://k2-fsa.github.io/sherpa/ncnn/index.html
			
 
				+to install sherpa-ncnn and to download the pre-trained models
			
 
				+used in this file.
			
 
				+"""
			
 
				+
			
 
				+import wave
			
 
				+
			
 
				+import numpy as np
			
 
				+import sherpa_ncnn
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    recognizer = sherpa_ncnn.Recognizer(
			
 
				+        tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
			
 
				+        encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
			
 
				+        encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
			
 
				+        decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
			
 
				+        decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
			
 
				+        joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
			
 
				+        joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
			
 
				+        num_threads=4,
			
 
				+    )
			
 
				+
			
 
				+    filename = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/test_wavs/1.wav"
			
 
				+    with wave.open(filename) as f:
			
 
				+        assert f.getframerate() == recognizer.sample_rate, (
			
 
				+            f.getframerate(),
			
 
				+            recognizer.sample_rate,
			
 
				+        )
			
 
				+        assert f.getnchannels() == 1, f.getnchannels()
			
 
				+        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
			
 
				+        num_samples = f.getnframes()
			
 
				+        samples = f.readframes(num_samples)
			
 
				+        samples_int16 = np.frombuffer(samples, dtype=np.int16)
			
 
				+        samples_float32 = samples_int16.astype(np.float32)
			
 
				+
			
 
				+        samples_float32 = samples_float32 / 32768
			
 
				+
			
 
				+    recognizer.accept_waveform(recognizer.sample_rate, samples_float32)
			
 
				+
			
 
				+    tail_paddings = np.zeros(int(recognizer.sample_rate * 0.5), dtype=np.float32)
			
 
				+    recognizer.accept_waveform(recognizer.sample_rate, tail_paddings)
			
 
				+
			
 
				+    recognizer.input_finished()
			
 
				+
			
 
				+    print(recognizer.text)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/python-api-examples/speech-recognition-from-microphone.py
+++ b/python-api-examples/speech-recognition-from-microphone.py
@@ -0,0 +1,67 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+# Real-time speech recognition from a microphone with sherpa-ncnn Python API
			
 
				+#
			
 
				+# Please refer to
			
 
				+# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
			
 
				+# to download pre-trained models
			
 
				+
			
 
				+import sys
			
 
				+
			
 
				+try:
			
 
				+    import sounddevice as sd
			
 
				+except ImportError as e:
			
 
				+    print("Please install sounddevice first. You can use")
			
 
				+    print()
			
 
				+    print("  pip install sounddevice")
			
 
				+    print()
			
 
				+    print("to install it")
			
 
				+    sys.exit(-1)
			
 
				+
			
 
				+import sherpa_ncnn
			
 
				+
			
 
				+
			
 
				+def create_recognizer():
			
 
				+    # Please replace the model files if needed.
			
 
				+    # See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
			
 
				+    # for download links.
			
 
				+    recognizer = sherpa_ncnn.Recognizer(
			
 
				+        tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
			
 
				+        encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
			
 
				+        encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
			
 
				+        decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
			
 
				+        decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
			
 
				+        joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
			
 
				+        joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
			
 
				+        num_threads=4,
			
 
				+    )
			
 
				+    return recognizer
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    print("Started! Please speak")
			
 
				+    recognizer = create_recognizer()
			
 
				+    sample_rate = recognizer.sample_rate
			
 
				+    samples_per_read = int(0.02 * sample_rate)  # 20ms
			
 
				+    last_result = ""
			
 
				+    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
			
 
				+        while True:
			
 
				+            samples, _ = s.read(samples_per_read)  # a blocking read
			
 
				+            samples = samples.reshape(-1)
			
 
				+            recognizer.accept_waveform(sample_rate, samples)
			
 
				+            result = recognizer.text
			
 
				+            if last_result != result:
			
 
				+                last_result = result
			
 
				+                print(result)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    devices = sd.query_devices()
			
 
				+    print(devices)
			
 
				+    default_input_device_idx = sd.default.device[0]
			
 
				+    print(f'Use default device: {devices[default_input_device_idx]["name"]}')
			
 
				+
			
 
				+    try:
			
 
				+        main()
			
 
				+    except KeyboardInterrupt:
			
 
				+        print("\nCatch Ctrl + C. Exiting")
			
--- a/sherpa-ncnn/python/sherpa_ncnn/recognizer.py
+++ b/sherpa-ncnn/python/sherpa_ncnn/recognizer.py
@@ -10,7 +10,7 @@ def _assert_file_exists(f: str):
 
				 
			
 
				 def _read_tokens(tokens):
			
 
				     sym_table = {}
			
 
				-    with open(tokens) as f:
			
 
				+    with open(tokens, "r", encoding="utf-8") as f:
			
 
				         for line in f:
			
 
				             sym, i = line.split()
			
 
				             sym = sym.replace("▁", " ")