decode-file.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. #!/usr/bin/env python3
  2. """
  3. This file demonstrates how to use sherpa-ncnn Python API to recognize
  4. a single file.
  5. Please refer to
  6. https://k2-fsa.github.io/sherpa/ncnn/index.html
  7. to install sherpa-ncnn and to download the pre-trained models
  8. used in this file.
  9. """
  10. import time
  11. import wave
  12. import numpy as np
  13. import sherpa_ncnn
  14. def main():
  15. # Please refer to https://k2-fsa.github.io/sherpa/ncnn/index.html
  16. # to download the model files
  17. recognizer = sherpa_ncnn.Recognizer(
  18. tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
  19. encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
  20. encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
  21. decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
  22. decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
  23. joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
  24. joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
  25. num_threads=4,
  26. )
  27. filename = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/test_wavs/1.wav"
  28. with wave.open(filename) as f:
  29. # Note: If wave_file_sample_rate is different from
  30. # recognizer.sample_rate, we will do resampling inside sherpa-ncnn
  31. wave_file_sample_rate = f.getframerate()
  32. assert f.getnchannels() == 1, f.getnchannels()
  33. assert f.getsampwidth() == 2, f.getsampwidth() # it is in bytes
  34. num_samples = f.getnframes()
  35. samples = f.readframes(num_samples)
  36. samples_int16 = np.frombuffer(samples, dtype=np.int16)
  37. samples_float32 = samples_int16.astype(np.float32)
  38. samples_float32 = samples_float32 / 32768
  39. # simulate streaming
  40. chunk_size = int(0.1 * wave_file_sample_rate) # 0.1 seconds
  41. start = 0
  42. while start < samples_float32.shape[0]:
  43. end = start + chunk_size
  44. end = min(end, samples_float32.shape[0])
  45. recognizer.accept_waveform(wave_file_sample_rate, samples_float32[start:end])
  46. start = end
  47. text = recognizer.text
  48. if text:
  49. print(text)
  50. # simulate streaming by sleeping
  51. time.sleep(0.1)
  52. tail_paddings = np.zeros(int(wave_file_sample_rate * 0.5), dtype=np.float32)
  53. recognizer.accept_waveform(wave_file_sample_rate, tail_paddings)
  54. recognizer.input_finished()
  55. text = recognizer.text
  56. if text:
  57. print(text)
  58. if __name__ == "__main__":
  59. main()