Эх сурвалжийг харах

Add Python API examples - Real-time speech recognition with a microphone (#51)

* Add Python API examples

* fix a typo
Fangjun Kuang 2 жил өмнө
parent
commit
b2d47bcf16

+ 9 - 0
python-api-examples/README.md

@@ -0,0 +1,9 @@
+# Introduction
+
+## decode-file.py
+
+This file shows how to recognize a file.
+
+## speech-recognition-from-microphone.py
+
+This file demonstrates how to do real-time speech recognition with a microphone.

+ 57 - 0
python-api-examples/decode-file.py

@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+"""
+This file demonstrates how to use sherpa-ncnn Python API to recognize
+a single file.
+
+Please refer to
+https://k2-fsa.github.io/sherpa/ncnn/index.html
+to install sherpa-ncnn and to download the pre-trained models
+used in this file.
+"""
+
+import wave
+
+import numpy as np
+import sherpa_ncnn
+
+
+def main():
+    recognizer = sherpa_ncnn.Recognizer(
+        tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
+        encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
+        encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
+        decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
+        decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
+        joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
+        joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
+        num_threads=4,
+    )
+
+    filename = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/test_wavs/1.wav"
+    with wave.open(filename) as f:
+        assert f.getframerate() == recognizer.sample_rate, (
+            f.getframerate(),
+            recognizer.sample_rate,
+        )
+        assert f.getnchannels() == 1, f.getnchannels()
+        assert f.getsampwidth() == 2, f.getsampwidth()  # it is in bytes
+        num_samples = f.getnframes()
+        samples = f.readframes(num_samples)
+        samples_int16 = np.frombuffer(samples, dtype=np.int16)
+        samples_float32 = samples_int16.astype(np.float32)
+
+        samples_float32 = samples_float32 / 32768
+
+    recognizer.accept_waveform(recognizer.sample_rate, samples_float32)
+
+    tail_paddings = np.zeros(int(recognizer.sample_rate * 0.5), dtype=np.float32)
+    recognizer.accept_waveform(recognizer.sample_rate, tail_paddings)
+
+    recognizer.input_finished()
+
+    print(recognizer.text)
+
+
+if __name__ == "__main__":
+    main()

+ 67 - 0
python-api-examples/speech-recognition-from-microphone.py

@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+
+# Real-time speech recognition from a microphone with sherpa-ncnn Python API
+#
+# Please refer to
+# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
+# to download pre-trained models
+
+import sys
+
+try:
+    import sounddevice as sd
+except ImportError as e:
+    print("Please install sounddevice first. You can use")
+    print()
+    print("  pip install sounddevice")
+    print()
+    print("to install it")
+    sys.exit(-1)
+
+import sherpa_ncnn
+
+
+def create_recognizer():
+    # Please replace the model files if needed.
+    # See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
+    # for download links.
+    recognizer = sherpa_ncnn.Recognizer(
+        tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
+        encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
+        encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
+        decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
+        decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
+        joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
+        joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
+        num_threads=4,
+    )
+    return recognizer
+
+
+def main():
+    print("Started! Please speak")
+    recognizer = create_recognizer()
+    sample_rate = recognizer.sample_rate
+    samples_per_read = int(0.02 * sample_rate)  # 20ms
+    last_result = ""
+    with sd.InputStream(channels=1, dtype="float32", samplerate=sample_rate) as s:
+        while True:
+            samples, _ = s.read(samples_per_read)  # a blocking read
+            samples = samples.reshape(-1)
+            recognizer.accept_waveform(sample_rate, samples)
+            result = recognizer.text
+            if last_result != result:
+                last_result = result
+                print(result)
+
+
+if __name__ == "__main__":
+    devices = sd.query_devices()
+    print(devices)
+    default_input_device_idx = sd.default.device[0]
+    print(f'Use default device: {devices[default_input_device_idx]["name"]}')
+
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\nCatch Ctrl + C. Exiting")

+ 1 - 1
sherpa-ncnn/python/sherpa_ncnn/recognizer.py

@@ -10,7 +10,7 @@ def _assert_file_exists(f: str):
 
 def _read_tokens(tokens):
     sym_table = {}
-    with open(tokens) as f:
+    with open(tokens, "r", encoding="utf-8") as f:
         for line in f:
             sym, i = line.split()
             sym = sym.replace("▁", " ")