Procházet zdrojové kódy

Add examples for NodeJS (#280)

Fangjun Kuang před 1 rokem
rodič
revize
693ea4229d

+ 95 - 0
.github/workflows/nodejs.yaml

@@ -0,0 +1,95 @@
+name: nodejs
+
+on:
+  push:
+    branches:
+      - master
+    paths:
+      - '.github/workflows/nodejs.yaml'
+      - 'CMakeLists.txt'
+      - 'cmake/**'
+      - 'nodejs-examples/**'
+      - 'sherpa-ncnn/csrc/*'
+  pull_request:
+    branches:
+      - master
+    paths:
+      - '.github/workflows/nodejs.yaml'
+      - 'CMakeLists.txt'
+      - 'cmake/**'
+      - 'nodejs-examples/**'
+      - 'sherpa-ncnn/csrc/*'
+
+concurrency:
+  group: nodejs-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  nodejs:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        python-version: ["3.8"]
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - uses: actions/setup-node@v3
+        with:
+          node-version: 13
+
+      - name: Display node version
+        shell: bash
+        run: |
+          node --version
+          npm --version
+          cd nodejs-examples
+
+          npm install npm@6.14.4 -g
+          npm install npm@6.14.4
+          npm --version
+
+      - name: Install npm packages
+        shell: bash
+        run: |
+          cd nodejs-examples
+          npm install ffi-napi ref-struct-napi wav
+          npm list
+
+      - name: ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          key: ${{ matrix.os }}-shared
+
+      - name: Download model
+        shell: bash
+        run: |
+          cd nodejs-examples
+          GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13
+          cd sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13
+          git lfs pull --include "*.bin"
+          ls -lh
+
+      - name: Test
+        shell: bash
+        run: |
+          export CMAKE_CXX_COMPILER_LAUNCHER=ccache
+          export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
+          cmake --version
+
+          cd nodejs-examples
+          ls -lh
+
+          ./run.sh

+ 15 - 13
CMakeLists.txt

@@ -92,19 +92,21 @@ endif()
 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
 set(CMAKE_CXX_EXTENSIONS OFF)
 
-include(CheckIncludeFileCXX)
-check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA)
-if(SHERPA_NCNN_HAS_ALSA)
-  add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1)
-elseif(UNIX AND NOT APPLE)
-  message(WARNING "\
-Could not find alsa/asoundlib.h !
-We won't build sherpa-ncnn-alsa
-To fix that, please do:
-  (1) sudo apt-get install alsa-utils libasound2-dev
-  (2) rm -rf build
-  (3) re-try
-")
+if(SHERPA_NCNN_ENABLE_BINARY AND UNIX AND NOT APPLE)
+  include(CheckIncludeFileCXX)
+  check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA)
+  if(SHERPA_NCNN_HAS_ALSA)
+    add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1)
+  elseif(UNIX AND NOT APPLE)
+    message(WARNING "\
+  Could not find alsa/asoundlib.h !
+  We won't build sherpa-ncnn-alsa
+  To fix that, please do:
+    (1) sudo apt-get install alsa-utils libasound2-dev
+    (2) rm -rf build
+    (3) re-try
+  ")
+  endif()
 endif()
 
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)

+ 4 - 0
nodejs-examples/.gitignore

@@ -0,0 +1,4 @@
+install
+node_modules
+package.json
+package-lock.json

+ 181 - 0
nodejs-examples/index.js

@@ -0,0 +1,181 @@
+// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
+//
+// Please use
+//
+// npm install ffi-napi ref-struct-napi
+//
+// before you use this file
+//
+//
+// Please use node 13. node 16, 18, 20, and 21 are known not working.
+// See also
+// https://github.com/node-ffi-napi/node-ffi-napi/issues/244
+// and
+// https://github.com/node-ffi-napi/node-ffi-napi/issues/97
+'use strict'
+
+const debug = require('debug')('sherpa-ncnn');
+const os = require('os');
+const path = require('path');
+const ffi = require('ffi-napi');
+const ref = require('ref-napi');
+const fs = require('fs');
+
+const StructType = require('ref-struct-napi');
+const cstring = ref.types.CString;
+const int32_t = ref.types.int32;
+const float = ref.types.float;
+const floatPtr = ref.refType(float);
+
+const RecognizerPtr = ref.refType(ref.types.void);
+const StreamPtr = ref.refType(ref.types.void);
+const SherpaNcnnModelConfig = StructType({
+  'encoderParam': cstring,
+  'encoderBin': cstring,
+  'decoderParam': cstring,
+  'decoderBin': cstring,
+  'joinerParam': cstring,
+  'joinerBin': cstring,
+  'tokens': cstring,
+  'useVulkanCompute': int32_t,
+  'numThreads': int32_t,
+});
+
+const SherpaNcnnDecoderConfig = StructType({
+  'decodingMethod': cstring,
+  'numActivePaths': int32_t,
+});
+
+const SherpaNcnnFeatureExtractorConfig = StructType({
+  'sampleRate': float,
+  'featureDim': int32_t,
+});
+
+const SherpaNcnnRecognizerConfig = StructType({
+  'featConfig': SherpaNcnnFeatureExtractorConfig,
+  'modelConfig': SherpaNcnnModelConfig,
+  'decoderConfig': SherpaNcnnDecoderConfig,
+  'enableEndpoint': int32_t,
+  'rule1MinTrailingSilence': float,
+  'rule2MinTrailingSilence': float,
+  'rule3MinUtteranceLength': float,
+  'hotwordsFile': cstring,
+  'hotwordsScore': cstring,
+});
+
+const SherpaNcnnResult = StructType({
+  'text': cstring,
+  'tokens': cstring,
+  'timestamps': floatPtr,
+  'count': int32_t,
+});
+
+
+const ResultPtr = ref.refType(SherpaNcnnResult);
+const RecognizerConfigPtr = ref.refType(SherpaNcnnRecognizerConfig)
+
+let soname;
+if (os.platform() == 'win32') {
+  soname = path.join(__dirname, 'install', 'lib', 'sherpa-ncnn-c-api.dll');
+} else if (os.platform() == 'darwin') {
+  soname = path.join(__dirname, 'install', 'lib', 'libsherpa-ncnn-c-api.dylib');
+} else if (os.platform() == 'linux') {
+  soname = path.join(__dirname, 'install', 'lib', 'libsherpa-ncnn-c-api.so');
+} else {
+  throw new Error(`Unsupported platform ${os.platform()}`);
+}
+if (!fs.existsSync(soname)) {
+  throw new Error(`Cannot find file ${soname}. Please make sure you have run
+      ./build.sh`);
+}
+
+debug('soname ', soname)
+
+const libsherpa_ncnn = ffi.Library(soname, {
+  'CreateRecognizer': [RecognizerPtr, [RecognizerConfigPtr]],
+  'DestroyRecognizer': ['void', [RecognizerPtr]],
+  'CreateStream': [StreamPtr, [RecognizerPtr]],
+  'DestroyStream': ['void', [StreamPtr]],
+  'AcceptWaveform': ['void', [StreamPtr, float, floatPtr, int32_t]],
+  'IsReady': [int32_t, [RecognizerPtr, StreamPtr]],
+  'Decode': ['void', [RecognizerPtr, StreamPtr]],
+  'GetResult': [ResultPtr, [RecognizerPtr, StreamPtr]],
+  'DestroyResult': ['void', [ResultPtr]],
+  'Reset': ['void', [RecognizerPtr, StreamPtr]],
+  'InputFinished': ['void', [StreamPtr]],
+  'IsEndpoint': [int32_t, [RecognizerPtr, StreamPtr]],
+});
+
+class Recognizer {
+  /**
+   * @param {SherpaNcnnRecognizerConfig} config Configuration for the recognizer
+   *
+   * The user has to invoke this.free() at the end to avoid memory leak.
+   */
+  constructor(config) {
+    this.recognizer_handle = libsherpa_ncnn.CreateRecognizer(config.ref());
+    this.stream_handle = libsherpa_ncnn.CreateStream(this.recognizer_handle);
+  }
+
+  free() {
+    if (this.stream_handle) {
+      libsherpa_ncnn.DestroyStream(this.stream_handle);
+      this.stream_handle = null;
+    }
+
+    libsherpa_ncnn.DestroyRecognizer(this.recognizer_handle);
+    this.handle = null;
+  }
+
+  /**
+   * @param {bool} true to create a new stream
+   */
+  reset(recreate) {
+    if (recreate) {
+      libsherpa_ncnn.DestroyStream(this.stream_handle);
+      this.stream_handle = libsherpa_ncnn.CreateStream(this.recognizer_handle);
+      return;
+    }
+    libsherpa_ncnn.Reset(this.recognizer_handle, this.stream_handle)
+  }
+  /**
+   * @param {float} Sample rate of the input data
+   * @param {float[]} A 1-d float array containing audio samples. It should be
+   *                  in the range [-1, 1].
+   */
+  acceptWaveform(sampleRate, samples) {
+    libsherpa_ncnn.AcceptWaveform(
+        this.stream_handle, sampleRate, samples, samples.length);
+  }
+
+  isReady() {
+    return libsherpa_ncnn.IsReady(this.recognizer_handle, this.stream_handle);
+  }
+
+  decode() {
+    libsherpa_ncnn.Decode(this.recognizer_handle, this.stream_handle);
+  }
+
+  getResult() {
+    const h =
+        libsherpa_ncnn.GetResult(this.recognizer_handle, this.stream_handle);
+    const text = Buffer.from(h.deref().text, 'utf-8').toString();
+    libsherpa_ncnn.DestroyResult(h);
+    return text;
+  }
+};
+
+// alias
+
+const ModelConfig = SherpaNcnnModelConfig;
+const DecoderConfig = SherpaNcnnDecoderConfig;
+const FeatureConfig = SherpaNcnnFeatureExtractorConfig;
+const RecognizerConfig = SherpaNcnnRecognizerConfig;
+
+module.exports = {
+  FeatureConfig,
+  ModelConfig,
+  DecoderConfig,
+  Recognizer,
+  RecognizerConfig,
+};

+ 29 - 0
nodejs-examples/package.json

@@ -0,0 +1,29 @@
+{
+  "name": "sherpa-ncnn",
+  "version": "2.1.4",
+  "description": "real-time speech recognition with Next-gen Kaldi",
+  "main": "index.js",
+  "dependencies": {
+    "ffi-napi": "^4.0.3",
+    "ref-struct-napi": "^1.1.1",
+    "wav": "^1.0.2"
+  },
+  "devDependencies": {},
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+ssh://git@github.com/k2-fsa/sherpa-ncnn.git"
+  },
+  "keywords": [
+    "speech-to-text;",
+    "ASR"
+  ],
+  "author": "The Next-gen Kaldi team",
+  "license": "Apache-2.0",
+  "bugs": {
+    "url": "https://github.com/k2-fsa/sherpa-ncnn/issues"
+  },
+  "homepage": "https://github.com/k2-fsa/sherpa-ncnn#readme"
+}

+ 34 - 0
nodejs-examples/run.sh

@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+# Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
+
+npm list | grep ffi-napi >/dev/null || npm install ffi-napi
+npm list | grep ref-struct-napi >/dev/null || npm install ref-struct-napi
+npm list | grep wav >/dev/null || npm install wav
+
+if [ ! -e ./install ]; then
+  cd ..
+  mkdir -p build
+  cd build
+  cmake -DBUILD_SHARED_LIBS=ON \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_NCNN_ENABLE_PORTAUDIO=OFF \
+    -DSHERPA_NCNN_ENABLE_BINARY=OFF \
+    -DSHERPA_NCNN_ENABLE_C_API=ON \
+    -DSHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE=OFF \
+    -DSHERPA_NCNN_ENABLE_PYTHON=OFF \
+    ..
+  make -j3
+  make install
+  cd ../nodejs-examples
+  ln -s $PWD/../build/install .
+fi
+
+if [ ! -d ./sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13 ]; then
+  echo "Please refer to"
+  echo "https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/zipformer-transucer-models.html#csukuangfj-sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13-bilingual-chinese-english"
+  echo "to download the models"
+  exit 0
+fi
+
+node ./test.js

+ 98 - 0
nodejs-examples/test.js

@@ -0,0 +1,98 @@
+// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
+//
+const fs = require('fs');
+const {Readable} = require('stream');
+const wav = require('wav');
+
+sherpa_ncnn = require('./index.js')
+
+const featConfig = new sherpa_ncnn.FeatureConfig();
+featConfig.sampleRate = 16000;
+featConfig.featureDim = 80;
+
+const decoderConfig = new sherpa_ncnn.DecoderConfig();
+decoderConfig.decodingMethod = 'greedy_search';
+decoderConfig.numActivePaths = 4;
+
+const modelConfig = new sherpa_ncnn.ModelConfig();
+modelConfig.encoderParam =
+    './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.param';
+modelConfig.encoderBin =
+    './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.bin';
+
+modelConfig.decoderParam =
+    './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.param';
+modelConfig.decoderBin =
+    './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.bin';
+
+modelConfig.joinerParam =
+    './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.param';
+modelConfig.joinerBin =
+    './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.bin';
+
+modelConfig.tokens =
+    './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/tokens.txt';
+modelConfig.useVulkanCompute = 0;
+modelConfig.numThreads = 1;
+
+const recognizerConfig = new sherpa_ncnn.RecognizerConfig();
+recognizerConfig.featConfig = featConfig;
+recognizerConfig.modelConfig = modelConfig;
+recognizerConfig.decoderConfig = decoderConfig;
+
+const recognizer = new sherpa_ncnn.Recognizer(recognizerConfig);
+
+const waveFilename =
+    './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/test_wavs/2.wav'
+
+const reader = new wav.Reader();
+const readable = new Readable().wrap(reader);
+
+function decode(samples) {
+  recognizer.acceptWaveform(recognizerConfig.featConfig.sampleRate, samples);
+
+  while (recognizer.isReady()) {
+    recognizer.decode();
+  }
+  const text = recognizer.getResult();
+  console.log(text);
+}
+
+reader.on('format', ({audioFormat, sampleRate, channels, bitDepth}) => {
+  if (audioFormat != 1) {
+    throw new Error(`Only support PCM format. Given ${audioFormat}`);
+  }
+  if (channels != 1) {
+    throw new Error(`Only a single channel. Given ${channel}`);
+  }
+
+  if (bitDepth != 16) {
+    throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
+  }
+});
+
+fs.createReadStream(waveFilename, {'highWaterMark': 4096})
+    .pipe(reader)
+    .on('finish', function(err) {
+      // tail padding
+      const floatSamples =
+          new Float32Array(recognizerConfig.featConfig.sampleRate * 0.5);
+      decode(floatSamples);
+      recognizer.free()
+    });
+
+
+readable.on('readable', function() {
+  let chunk;
+  while ((chunk = readable.read()) != null) {
+    const int16Samples = new Int16Array(
+        chunk.buffer, chunk.byteOffset,
+        chunk.length / Int16Array.BYTES_PER_ELEMENT);
+
+    let floatSamples = new Float32Array(int16Samples.length);
+    for (let i = 0; i < floatSamples.length; i++) {
+      floatSamples[i] = int16Samples[i] / 32768.0;
+    }
+    decode(floatSamples);
+  }
+});