decode-file.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. #!/usr/bin/env python3
  2. """
  3. This file demonstrates how to use sherpa-ncnn Python API to recognize
  4. a single file.
  5. Please refer to
  6. https://k2-fsa.github.io/sherpa/ncnn/index.html
  7. to install sherpa-ncnn and to download the pre-trained models
  8. used in this file.
  9. """
  10. import time
  11. import wave
  12. import numpy as np
  13. import sherpa_ncnn
  14. def main():
  15. # Please refer to https://k2-fsa.github.io/sherpa/ncnn/index.html
  16. # to download the model files
  17. recognizer = sherpa_ncnn.Recognizer(
  18. tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
  19. encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
  20. encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
  21. decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
  22. decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
  23. joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
  24. joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
  25. num_threads=4,
  26. hotwords_file="",
  27. hotwords_score=1.5,
  28. )
  29. filename = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/test_wavs/1.wav"
  30. with wave.open(filename) as f:
  31. # Note: If wave_file_sample_rate is different from
  32. # recognizer.sample_rate, we will do resampling inside sherpa-ncnn
  33. wave_file_sample_rate = f.getframerate()
  34. num_channels = f.getnchannels()
  35. assert f.getsampwidth() == 2, f.getsampwidth() # it is in bytes
  36. num_samples = f.getnframes()
  37. samples = f.readframes(num_samples)
  38. samples_int16 = np.frombuffer(samples, dtype=np.int16)
  39. samples_int16 = samples_int16.reshape(-1, num_channels)[:, 0]
  40. samples_float32 = samples_int16.astype(np.float32)
  41. samples_float32 = samples_float32 / 32768
  42. # simulate streaming
  43. chunk_size = int(0.1 * wave_file_sample_rate) # 0.1 seconds
  44. start = 0
  45. while start < samples_float32.shape[0]:
  46. end = start + chunk_size
  47. end = min(end, samples_float32.shape[0])
  48. recognizer.accept_waveform(wave_file_sample_rate, samples_float32[start:end])
  49. start = end
  50. text = recognizer.text
  51. if text:
  52. print(text)
  53. # simulate streaming by sleeping
  54. time.sleep(0.1)
  55. tail_paddings = np.zeros(int(wave_file_sample_rate * 0.5), dtype=np.float32)
  56. recognizer.accept_waveform(wave_file_sample_rate, tail_paddings)
  57. recognizer.input_finished()
  58. text = recognizer.text
  59. if text:
  60. print(text)
  61. if __name__ == "__main__":
  62. main()