decode-file.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #!/usr/bin/env python3
  2. """
  3. This file demonstrates how to use sherpa-ncnn Python API to recognize
  4. a single file.
  5. Please refer to
  6. https://k2-fsa.github.io/sherpa/ncnn/index.html
  7. to install sherpa-ncnn and to download the pre-trained models
  8. used in this file.
  9. """
  10. import time
  11. import wave
  12. import numpy as np
  13. import sherpa_ncnn
  14. def main():
  15. # Please refer to https://k2-fsa.github.io/sherpa/ncnn/index.html
  16. # to download the model files
  17. recognizer = sherpa_ncnn.Recognizer(
  18. tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
  19. encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
  20. encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
  21. decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
  22. decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
  23. joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
  24. joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
  25. num_threads=4,
  26. )
  27. filename = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/test_wavs/1.wav"
  28. with wave.open(filename) as f:
  29. # Note: If wave_file_sample_rate is different from
  30. # recognizer.sample_rate, we will do resampling inside sherpa-ncnn
  31. wave_file_sample_rate = f.getframerate()
  32. num_channels = f.getnchannels()
  33. assert f.getsampwidth() == 2, f.getsampwidth() # it is in bytes
  34. num_samples = f.getnframes()
  35. samples = f.readframes(num_samples)
  36. samples_int16 = np.frombuffer(samples, dtype=np.int16)
  37. samples_int16 = samples_int16.reshape(-1, num_channels)[:, 0]
  38. samples_float32 = samples_int16.astype(np.float32)
  39. samples_float32 = samples_float32 / 32768
  40. # simulate streaming
  41. chunk_size = int(0.1 * wave_file_sample_rate) # 0.1 seconds
  42. start = 0
  43. while start < samples_float32.shape[0]:
  44. end = start + chunk_size
  45. end = min(end, samples_float32.shape[0])
  46. recognizer.accept_waveform(wave_file_sample_rate, samples_float32[start:end])
  47. start = end
  48. text = recognizer.text
  49. if text:
  50. print(text)
  51. # simulate streaming by sleeping
  52. time.sleep(0.1)
  53. tail_paddings = np.zeros(int(wave_file_sample_rate * 0.5), dtype=np.float32)
  54. recognizer.accept_waveform(wave_file_sample_rate, tail_paddings)
  55. recognizer.input_finished()
  56. text = recognizer.text
  57. if text:
  58. print(text)
  59. if __name__ == "__main__":
  60. main()