12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273 |
- #!/usr/bin/env python3
- """
- This file demonstrates how to use sherpa-ncnn Python API to recognize
- a single file.
- Please refer to
- https://k2-fsa.github.io/sherpa/ncnn/index.html
- to install sherpa-ncnn and to download the pre-trained models
- used in this file.
- """
- import time
- import wave
- import numpy as np
- import sherpa_ncnn
- def main():
- # Please refer to https://k2-fsa.github.io/sherpa/ncnn/index.html
- # to download the model files
- recognizer = sherpa_ncnn.Recognizer(
- tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
- encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
- encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
- decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
- decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
- joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
- joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
- num_threads=4,
- )
- filename = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/test_wavs/1.wav"
- with wave.open(filename) as f:
- # Note: If wave_file_sample_rate is different from
- # recognizer.sample_rate, we will do resampling inside sherpa-ncnn
- wave_file_sample_rate = f.getframerate()
- num_channels = f.getnchannels()
- assert f.getsampwidth() == 2, f.getsampwidth() # it is in bytes
- num_samples = f.getnframes()
- samples = f.readframes(num_samples)
- samples_int16 = np.frombuffer(samples, dtype=np.int16)
- samples_int16 = samples_int16.reshape(-1, num_channels)[:, 0]
- samples_float32 = samples_int16.astype(np.float32)
- samples_float32 = samples_float32 / 32768
- # simulate streaming
- chunk_size = int(0.1 * wave_file_sample_rate) # 0.1 seconds
- start = 0
- while start < samples_float32.shape[0]:
- end = start + chunk_size
- end = min(end, samples_float32.shape[0])
- recognizer.accept_waveform(wave_file_sample_rate, samples_float32[start:end])
- start = end
- text = recognizer.text
- if text:
- print(text)
- # simulate streaming by sleeping
- time.sleep(0.1)
- tail_paddings = np.zeros(int(wave_file_sample_rate * 0.5), dtype=np.float32)
- recognizer.accept_waveform(wave_file_sample_rate, tail_paddings)
- recognizer.input_finished()
- text = recognizer.text
- if text:
- print(text)
- if __name__ == "__main__":
- main()
|