sherpa-ncnn.js 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. function freeConfig(config) {
  2. if ('buffer' in config) {
  3. _free(config.buffer);
  4. }
  5. _free(config.ptr);
  6. }
  7. // The user should free the returned pointers
  8. function initSherpaNcnnModelConfig(config) {
  9. let encoderParamLen = lengthBytesUTF8(config.encoderParam) + 1;
  10. let decoderParamLen = lengthBytesUTF8(config.decoderParam) + 1;
  11. let joinerParamLen = lengthBytesUTF8(config.joinerParam) + 1;
  12. let encoderBinLen = lengthBytesUTF8(config.encoderBin) + 1;
  13. let decoderBinLen = lengthBytesUTF8(config.decoderBin) + 1;
  14. let joinerBinLen = lengthBytesUTF8(config.joinerBin) + 1;
  15. let tokensLen = lengthBytesUTF8(config.tokens) + 1;
  16. let n = encoderParamLen + decoderParamLen + joinerParamLen;
  17. n += encoderBinLen + decoderBinLen + joinerBinLen;
  18. n += tokensLen;
  19. let buffer = _malloc(n);
  20. let ptr = _malloc(4 * 9);
  21. let offset = 0;
  22. stringToUTF8(config.encoderParam, buffer + offset, encoderParamLen);
  23. offset += encoderParamLen;
  24. stringToUTF8(config.encoderBin, buffer + offset, encoderBinLen);
  25. offset += encoderBinLen;
  26. stringToUTF8(config.decoderParam, buffer + offset, decoderParamLen);
  27. offset += decoderParamLen;
  28. stringToUTF8(config.decoderBin, buffer + offset, decoderBinLen);
  29. offset += decoderBinLen;
  30. stringToUTF8(config.joinerParam, buffer + offset, joinerParamLen);
  31. offset += joinerParamLen;
  32. stringToUTF8(config.joinerBin, buffer + offset, joinerBinLen);
  33. offset += joinerBinLen;
  34. stringToUTF8(config.tokens, buffer + offset, tokensLen);
  35. offset += tokensLen;
  36. offset = 0;
  37. Module.setValue(ptr, buffer + offset, 'i8*'); // encoderParam
  38. offset += encoderParamLen;
  39. Module.setValue(ptr + 4, buffer + offset, 'i8*'); // encoderBin
  40. offset += encoderBinLen;
  41. Module.setValue(ptr + 8, buffer + offset, 'i8*'); // decoderParam
  42. offset += decoderParamLen;
  43. Module.setValue(ptr + 12, buffer + offset, 'i8*'); // decoderBin
  44. offset += decoderBinLen;
  45. Module.setValue(ptr + 16, buffer + offset, 'i8*'); // joinerParam
  46. offset += joinerParamLen;
  47. Module.setValue(ptr + 20, buffer + offset, 'i8*'); // joinerBin
  48. offset += joinerBinLen;
  49. Module.setValue(ptr + 24, buffer + offset, 'i8*'); // tokens
  50. offset += tokensLen;
  51. Module.setValue(ptr + 28, config.useVulkanCompute, 'i32');
  52. Module.setValue(ptr + 32, config.numThreads, 'i32');
  53. return {
  54. buffer: buffer, ptr: ptr, len: 36,
  55. }
  56. }
  57. function initSherpaNcnnDecoderConfig(config) {
  58. let n = lengthBytesUTF8(config.decodingMethod) + 1;
  59. let buffer = _malloc(n);
  60. let ptr = _malloc(4 * 2);
  61. stringToUTF8(config.decodingMethod, buffer, n);
  62. Module.setValue(ptr, buffer, 'i8*');
  63. Module.setValue(ptr + 4, config.numActivePaths, 'i32');
  64. return {
  65. buffer: buffer, ptr: ptr, len: 8,
  66. }
  67. }
  68. function initSherpaNcnnFeatureExtractorConfig(config) {
  69. let ptr = _malloc(4 * 2);
  70. Module.setValue(ptr, config.samplingRate, 'float');
  71. Module.setValue(ptr + 4, config.featureDim, 'i32');
  72. return {
  73. ptr: ptr, len: 8,
  74. }
  75. }
  76. function initSherpaNcnnRecognizerConfig(config) {
  77. let featConfig = initSherpaNcnnFeatureExtractorConfig(config.featConfig);
  78. let modelConfig = initSherpaNcnnModelConfig(config.modelConfig);
  79. let decoderConfig = initSherpaNcnnDecoderConfig(config.decoderConfig);
  80. let numBytes =
  81. featConfig.len + modelConfig.len + decoderConfig.len + 4 * 4 + 4 * 2;
  82. let ptr = _malloc(numBytes);
  83. let offset = 0;
  84. _CopyHeap(featConfig.ptr, featConfig.len, ptr + offset);
  85. offset += featConfig.len;
  86. _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset)
  87. offset += modelConfig.len;
  88. _CopyHeap(decoderConfig.ptr, decoderConfig.len, ptr + offset)
  89. offset += decoderConfig.len;
  90. Module.setValue(ptr + offset, config.enableEndpoint, 'i32');
  91. offset += 4;
  92. Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
  93. offset += 4;
  94. Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
  95. offset += 4;
  96. Module.setValue(ptr + offset, config.rule3MinUtternceLength, 'float');
  97. offset += 4;
  98. Module.setValue(ptr + offset, 0, 'i32'); // hotwords file
  99. offset += 4;
  100. Module.setValue(ptr + offset, 0.5, 'float'); // hotwords_score
  101. offset += 4;
  102. return {
  103. ptr: ptr, len: numBytes, featConfig: featConfig, modelConfig: modelConfig,
  104. decoderConfig: decoderConfig,
  105. }
  106. }
  107. class Stream {
  108. constructor(handle) {
  109. this.handle = handle;
  110. this.pointer = null;
  111. this.n = 0
  112. }
  113. free() {
  114. if (this.handle) {
  115. _DestroyStream(this.handle);
  116. this.handle = null;
  117. _free(this.pointer);
  118. this.pointer = null;
  119. this.n = 0;
  120. }
  121. }
  122. /**
  123. * @param sampleRate {Number}
  124. * @param samples {Float32Array} Containing samples in the range [-1, 1]
  125. */
  126. acceptWaveform(sampleRate, samples) {
  127. if (this.n < samples.length) {
  128. _free(this.pointer);
  129. this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT);
  130. this.n = samples.length
  131. }
  132. Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
  133. _AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length);
  134. }
  135. inputFinished() {
  136. _InputFinished(this.handle);
  137. }
  138. };
  139. class Recognizer {
  140. constructor(configObj, borrowedHandle) {
  141. if (borrowedHandle) {
  142. this.handle = borrowedHandle;
  143. return;
  144. }
  145. let config = initSherpaNcnnRecognizerConfig(configObj)
  146. let handle = _CreateRecognizer(config.ptr);
  147. freeConfig(config.featConfig);
  148. freeConfig(config.modelConfig);
  149. freeConfig(config.decoderConfig);
  150. freeConfig(config);
  151. this.handle = handle;
  152. }
  153. free() {
  154. _DestroyRecognizer(this.handle);
  155. this.handle = 0
  156. }
  157. createStream() {
  158. let handle = _CreateStream(this.handle);
  159. return new Stream(handle);
  160. }
  161. isReady(stream) {
  162. return _IsReady(this.handle, stream.handle) == 1;
  163. }
  164. isEndpoint(stream) {
  165. return _IsEndpoint(this.handle, stream.handle) == 1;
  166. }
  167. decode(stream) {
  168. return _Decode(this.handle, stream.handle);
  169. }
  170. reset(stream) {
  171. _Reset(this.handle, stream.handle);
  172. }
  173. getResult(stream) {
  174. let r = _GetResult(this.handle, stream.handle);
  175. let textPtr = getValue(r, 'i8*');
  176. let text = UTF8ToString(textPtr);
  177. _DestroyResult(r);
  178. return text;
  179. }
  180. }
  181. function createRecognizer() {
  182. let modelConfig = {
  183. encoderParam: './encoder_jit_trace-pnnx.ncnn.param',
  184. encoderBin: './encoder_jit_trace-pnnx.ncnn.bin',
  185. decoderParam: './decoder_jit_trace-pnnx.ncnn.param',
  186. decoderBin: './decoder_jit_trace-pnnx.ncnn.bin',
  187. joinerParam: './joiner_jit_trace-pnnx.ncnn.param',
  188. joinerBin: './joiner_jit_trace-pnnx.ncnn.bin',
  189. tokens: './tokens.txt',
  190. useVulkanCompute: 0,
  191. numThreads: 1,
  192. };
  193. let decoderConfig = {
  194. decodingMethod: 'greedy_search',
  195. numActivePaths: 4,
  196. };
  197. let featConfig = {
  198. samplingRate: 16000,
  199. featureDim: 80,
  200. };
  201. let configObj = {
  202. featConfig: featConfig,
  203. modelConfig: modelConfig,
  204. decoderConfig: decoderConfig,
  205. enableEndpoint: 1,
  206. rule1MinTrailingSilence: 1.2,
  207. rule2MinTrailingSilence: 2.4,
  208. rule3MinUtternceLength: 20,
  209. };
  210. return new Recognizer(configObj);
  211. }