sherpa-ncnn.js 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. function freeConfig(config, Module) {
  2. if ('buffer' in config) {
  3. Module._free(config.buffer);
  4. }
  5. Module._free(config.ptr);
  6. }
  7. // The user should free the returned pointers
  8. function initSherpaNcnnModelConfig(config, Module) {
  9. let encoderParamLen = Module.lengthBytesUTF8(config.encoderParam) + 1;
  10. let decoderParamLen = Module.lengthBytesUTF8(config.decoderParam) + 1;
  11. let joinerParamLen = Module.lengthBytesUTF8(config.joinerParam) + 1;
  12. let encoderBinLen = Module.lengthBytesUTF8(config.encoderBin) + 1;
  13. let decoderBinLen = Module.lengthBytesUTF8(config.decoderBin) + 1;
  14. let joinerBinLen = Module.lengthBytesUTF8(config.joinerBin) + 1;
  15. let tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
  16. let n = encoderParamLen + decoderParamLen + joinerParamLen;
  17. n += encoderBinLen + decoderBinLen + joinerBinLen;
  18. n += tokensLen;
  19. let buffer = Module._malloc(n);
  20. let ptr = Module._malloc(4 * 9);
  21. let offset = 0;
  22. Module.stringToUTF8(config.encoderParam, buffer + offset, encoderParamLen);
  23. offset += encoderParamLen;
  24. Module.stringToUTF8(config.encoderBin, buffer + offset, encoderBinLen);
  25. offset += encoderBinLen;
  26. Module.stringToUTF8(config.decoderParam, buffer + offset, decoderParamLen);
  27. offset += decoderParamLen;
  28. Module.stringToUTF8(config.decoderBin, buffer + offset, decoderBinLen);
  29. offset += decoderBinLen;
  30. Module.stringToUTF8(config.joinerParam, buffer + offset, joinerParamLen);
  31. offset += joinerParamLen;
  32. Module.stringToUTF8(config.joinerBin, buffer + offset, joinerBinLen);
  33. offset += joinerBinLen;
  34. Module.stringToUTF8(config.tokens, buffer + offset, tokensLen);
  35. offset += tokensLen;
  36. offset = 0;
  37. Module.setValue(ptr, buffer + offset, 'i8*'); // encoderParam
  38. offset += encoderParamLen;
  39. Module.setValue(ptr + 4, buffer + offset, 'i8*'); // encoderBin
  40. offset += encoderBinLen;
  41. Module.setValue(ptr + 8, buffer + offset, 'i8*'); // decoderParam
  42. offset += decoderParamLen;
  43. Module.setValue(ptr + 12, buffer + offset, 'i8*'); // decoderBin
  44. offset += decoderBinLen;
  45. Module.setValue(ptr + 16, buffer + offset, 'i8*'); // joinerParam
  46. offset += joinerParamLen;
  47. Module.setValue(ptr + 20, buffer + offset, 'i8*'); // joinerBin
  48. offset += joinerBinLen;
  49. Module.setValue(ptr + 24, buffer + offset, 'i8*'); // tokens
  50. offset += tokensLen;
  51. Module.setValue(ptr + 28, config.useVulkanCompute, 'i32');
  52. Module.setValue(ptr + 32, config.numThreads, 'i32');
  53. return {
  54. buffer: buffer, ptr: ptr, len: 36,
  55. }
  56. }
  57. function initSherpaNcnnDecoderConfig(config, Module) {
  58. let n = Module.lengthBytesUTF8(config.decodingMethod) + 1;
  59. let buffer = Module._malloc(n);
  60. let ptr = Module._malloc(4 * 2);
  61. Module.stringToUTF8(config.decodingMethod, buffer, n);
  62. Module.setValue(ptr, buffer, 'i8*');
  63. Module.setValue(ptr + 4, config.numActivePaths, 'i32');
  64. return {
  65. buffer: buffer, ptr: ptr, len: 8,
  66. }
  67. }
  68. function initSherpaNcnnFeatureExtractorConfig(config, Module) {
  69. let ptr = Module._malloc(4 * 2);
  70. Module.setValue(ptr, config.samplingRate, 'float');
  71. Module.setValue(ptr + 4, config.featureDim, 'i32');
  72. return {
  73. ptr: ptr, len: 8,
  74. }
  75. }
  76. function initSherpaNcnnRecognizerConfig(config, Module) {
  77. let featConfig =
  78. initSherpaNcnnFeatureExtractorConfig(config.featConfig, Module);
  79. let modelConfig = initSherpaNcnnModelConfig(config.modelConfig, Module);
  80. let decoderConfig = initSherpaNcnnDecoderConfig(config.decoderConfig, Module);
  81. let numBytes =
  82. featConfig.len + modelConfig.len + decoderConfig.len + 4 * 4 + 4 * 2;
  83. let ptr = Module._malloc(numBytes);
  84. let offset = 0;
  85. Module._CopyHeap(featConfig.ptr, featConfig.len, ptr + offset);
  86. offset += featConfig.len;
  87. Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset)
  88. offset += modelConfig.len;
  89. Module._CopyHeap(decoderConfig.ptr, decoderConfig.len, ptr + offset)
  90. offset += decoderConfig.len;
  91. Module.setValue(ptr + offset, config.enableEndpoint, 'i32');
  92. offset += 4;
  93. Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
  94. offset += 4;
  95. Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
  96. offset += 4;
  97. Module.setValue(ptr + offset, config.rule3MinUtternceLength, 'float');
  98. offset += 4;
  99. Module.setValue(ptr + offset, 0, 'i32'); // hotwords file
  100. offset += 4;
  101. Module.setValue(ptr + offset, 0.5, 'float'); // hotwords_score
  102. offset += 4;
  103. return {
  104. ptr: ptr, len: numBytes, featConfig: featConfig, modelConfig: modelConfig,
  105. decoderConfig: decoderConfig,
  106. }
  107. }
  108. class Stream {
  109. constructor(handle, Module) {
  110. this.handle = handle;
  111. this.pointer = null;
  112. this.n = 0;
  113. this.Module = Module;
  114. }
  115. free() {
  116. if (this.handle) {
  117. this.Module._DestroyStream(this.handle);
  118. this.handle = null;
  119. this.Module._free(this.pointer);
  120. this.pointer = null;
  121. this.n = 0;
  122. }
  123. }
  124. /**
  125. * @param sampleRate {Number}
  126. * @param samples {Float32Array} Containing samples in the range [-1, 1]
  127. */
  128. acceptWaveform(sampleRate, samples) {
  129. if (this.n < samples.length) {
  130. this.Module._free(this.pointer);
  131. this.pointer =
  132. this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
  133. this.n = samples.length
  134. }
  135. this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
  136. this.Module._AcceptWaveform(
  137. this.handle, sampleRate, this.pointer, samples.length);
  138. }
  139. inputFinished() {
  140. _InputFinished(this.handle);
  141. }
  142. };
  143. class Recognizer {
  144. constructor(configObj, Module) {
  145. this.config = configObj;
  146. let config = initSherpaNcnnRecognizerConfig(configObj, Module)
  147. let handle = Module._CreateRecognizer(config.ptr);
  148. freeConfig(config.featConfig, Module);
  149. freeConfig(config.modelConfig, Module);
  150. freeConfig(config.decoderConfig, Module);
  151. freeConfig(config, Module);
  152. this.handle = handle;
  153. this.Module = Module;
  154. }
  155. free() {
  156. this.Module._DestroyRecognizer(this.handle);
  157. this.handle = 0
  158. }
  159. createStream() {
  160. let handle = this.Module._CreateStream(this.handle);
  161. return new Stream(handle, this.Module);
  162. }
  163. isReady(stream) {
  164. return this.Module._IsReady(this.handle, stream.handle) == 1;
  165. }
  166. isEndpoint(stream) {
  167. return this.Module._IsEndpoint(this.handle, stream.handle) == 1;
  168. }
  169. decode(stream) {
  170. return this.Module._Decode(this.handle, stream.handle);
  171. }
  172. reset(stream) {
  173. this.Module._Reset(this.handle, stream.handle);
  174. }
  175. getResult(stream) {
  176. let r = this.Module._GetResult(this.handle, stream.handle);
  177. let textPtr = this.Module.getValue(r, 'i8*');
  178. let text = this.Module.UTF8ToString(textPtr);
  179. this.Module._DestroyResult(r);
  180. return text;
  181. }
  182. }
  183. function createRecognizer(Module, myConfig) {
  184. let modelConfig = {
  185. encoderParam: './encoder_jit_trace-pnnx.ncnn.param',
  186. encoderBin: './encoder_jit_trace-pnnx.ncnn.bin',
  187. decoderParam: './decoder_jit_trace-pnnx.ncnn.param',
  188. decoderBin: './decoder_jit_trace-pnnx.ncnn.bin',
  189. joinerParam: './joiner_jit_trace-pnnx.ncnn.param',
  190. joinerBin: './joiner_jit_trace-pnnx.ncnn.bin',
  191. tokens: './tokens.txt',
  192. useVulkanCompute: 0,
  193. numThreads: 1,
  194. };
  195. let decoderConfig = {
  196. decodingMethod: 'greedy_search',
  197. numActivePaths: 4,
  198. };
  199. let featConfig = {
  200. samplingRate: 16000,
  201. featureDim: 80,
  202. };
  203. let configObj = {
  204. featConfig: featConfig,
  205. modelConfig: modelConfig,
  206. decoderConfig: decoderConfig,
  207. enableEndpoint: 1,
  208. rule1MinTrailingSilence: 1.2,
  209. rule2MinTrailingSilence: 2.4,
  210. rule3MinUtternceLength: 20,
  211. };
  212. if (myConfig) {
  213. configObj = myConfig;
  214. }
  215. return new Recognizer(configObj, Module);
  216. }
  217. if (typeof process == 'object' && typeof process.versions == 'object' &&
  218. typeof process.versions.node == 'string') {
  219. module.exports = {
  220. createRecognizer,
  221. };
  222. }