decode-file.js 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. // Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2. const fs = require('fs');
  3. const wav = require('wav');
  4. const {Readable} = require('stream');
  5. const sherpa_ncnn = require('sherpa-ncnn');
  6. function createRecognizer() {
  7. let modelConfig = {
  8. encoderParam:
  9. './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.param',
  10. encoderBin:
  11. './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.bin',
  12. decoderParam:
  13. './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.param',
  14. decoderBin:
  15. './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.bin',
  16. joinerParam:
  17. './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.param',
  18. joinerBin:
  19. './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.bin',
  20. tokens:
  21. './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/tokens.txt',
  22. useVulkanCompute: 0,
  23. numThreads: 1,
  24. };
  25. let decoderConfig = {
  26. decodingMethod: 'greedy_search',
  27. numActivePaths: 4,
  28. };
  29. let featConfig = {
  30. samplingRate: 16000,
  31. featureDim: 80,
  32. };
  33. let config = {
  34. featConfig: featConfig,
  35. modelConfig: modelConfig,
  36. decoderConfig: decoderConfig,
  37. enableEndpoint: 1,
  38. rule1MinTrailingSilence: 1.2,
  39. rule2MinTrailingSilence: 2.4,
  40. rule3MinUtternceLength: 20,
  41. };
  42. return sherpa_ncnn.createRecognizer(config);
  43. }
  44. const recognizer = createRecognizer();
  45. const stream = recognizer.createStream();
  46. console.log(recognizer.config);
  47. const waveFilename =
  48. './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/test_wavs/0.wav';
  49. const reader = new wav.Reader();
  50. const readable = new Readable().wrap(reader);
  51. const buf = [];
  52. reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  53. if (sampleRate != recognizer.config.featConfig.samplingRate) {
  54. throw new Error(`Only support sampleRate ${
  55. recognizer.config.featConfig.samplingRate}. Given ${sampleRate}`);
  56. }
  57. if (audioFormat != 1) {
  58. throw new Error(`Only support PCM format. Given ${audioFormat}`);
  59. }
  60. if (channels != 1) {
  61. throw new Error(`Only a single channel. Given ${channel}`);
  62. }
  63. if (bitDepth != 16) {
  64. throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  65. }
  66. });
  67. fs.createReadStream(waveFilename, {'highWaterMark': 4096})
  68. .pipe(reader)
  69. .on('finish', function(err) {
  70. // tail padding
  71. const floatSamples =
  72. new Float32Array(recognizer.config.featConfig.samplingRate * 0.5);
  73. buf.push(floatSamples);
  74. const flattened =
  75. Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
  76. stream.acceptWaveform(
  77. recognizer.config.featConfig.samplingRate, flattened);
  78. while (recognizer.isReady(stream)) {
  79. recognizer.decode(stream);
  80. }
  81. const r = recognizer.getResult(stream);
  82. console.log('result:', r);
  83. stream.free();
  84. recognizer.free();
  85. });
  86. readable.on('readable', function() {
  87. let chunk;
  88. while ((chunk = readable.read()) != null) {
  89. const int16Samples = new Int16Array(
  90. chunk.buffer, chunk.byteOffset,
  91. chunk.length / Int16Array.BYTES_PER_ELEMENT);
  92. const floatSamples = new Float32Array(int16Samples.length);
  93. for (let i = 0; i < floatSamples.length; i++) {
  94. floatSamples[i] = int16Samples[i] / 32768.0;
  95. }
  96. buf.push(floatSamples);
  97. }
  98. });