decode-file-c-api.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. /**
  2. * Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  3. *
  4. * See LICENSE for clarification regarding multiple authors
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include <stdio.h>
  19. #include <stdlib.h>
  20. #include <string.h>
  21. #include "sherpa-ncnn/c-api/c-api.h"
  22. const char *kUsage =
  23. "\n"
  24. "Usage:\n"
  25. " ./bin/decode-file-c-api \\\n"
  26. " /path/to/tokens.txt \\\n"
  27. " /path/to/encoder.ncnn.param \\\n"
  28. " /path/to/encoder.ncnn.bin \\\n"
  29. " /path/to/decoder.ncnn.param \\\n"
  30. " /path/to/decoder.ncnn.bin \\\n"
  31. " /path/to/joiner.ncnn.param \\\n"
  32. " /path/to/joiner.ncnn.bin \\\n"
  33. " /path/to/foo.wav [<num_threads> [decode_method, can be "
  34. "greedy_search/modified_beam_search]]"
  35. "\n\n"
  36. "Please refer to \n"
  37. "https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html\n"
  38. "for a list of pre-trained models to download.\n";
  39. int32_t main(int32_t argc, char *argv[]) {
  40. if (argc < 9 || argc > 11) {
  41. fprintf(stderr, "%s\n", kUsage);
  42. return -1;
  43. }
  44. SherpaNcnnModelConfig model_config;
  45. model_config.tokens = argv[1];
  46. model_config.encoder_param = argv[2];
  47. model_config.encoder_bin = argv[3];
  48. model_config.decoder_param = argv[4];
  49. model_config.decoder_bin = argv[5];
  50. model_config.joiner_param = argv[6];
  51. model_config.joiner_bin = argv[7];
  52. int32_t num_threads = 4;
  53. if (argc >= 10 && atoi(argv[9]) > 0) {
  54. num_threads = atoi(argv[9]);
  55. }
  56. model_config.num_threads = num_threads;
  57. model_config.use_vulkan_compute = 0;
  58. SherpaNcnnDecoderConfig decoder_config;
  59. decoder_config.decoding_method = "greedy_search";
  60. if (argc == 11) {
  61. decoder_config.decoding_method = argv[10];
  62. }
  63. decoder_config.num_active_paths = 4;
  64. decoder_config.enable_endpoint = 0;
  65. decoder_config.rule1_min_trailing_silence = 2.4;
  66. decoder_config.rule2_min_trailing_silence = 1.2;
  67. decoder_config.rule3_min_utterance_length = 300;
  68. SherpaNcnnRecognizer *recognizer =
  69. CreateRecognizer(&model_config, &decoder_config);
  70. const char *wav_filename = argv[8];
  71. FILE *fp = fopen(wav_filename, "rb");
  72. if (!fp) {
  73. fprintf(stderr, "Failed to open %s\n", wav_filename);
  74. return -1;
  75. }
  76. // Assume the wave header occupies 44 bytes.
  77. fseek(fp, 44, SEEK_SET);
  78. // simulate streaming
  79. #define N 3200 // 0.2 s. Sample rate is fixed to 16 kHz
  80. int16_t buffer[N];
  81. float samples[N];
  82. while (!feof(fp)) {
  83. size_t n = fread((void *)buffer, sizeof(int16_t), N, fp);
  84. if (n > 0) {
  85. for (size_t i = 0; i != n; ++i) {
  86. samples[i] = buffer[i] / 32768.;
  87. }
  88. AcceptWaveform(recognizer, 16000, samples, n);
  89. Decode(recognizer);
  90. SherpaNcnnResult *r = GetResult(recognizer);
  91. if (strlen(r->text)) {
  92. fprintf(stderr, "%s\n", r->text);
  93. }
  94. DestroyResult(r);
  95. }
  96. }
  97. fclose(fp);
  98. // add some tail padding
  99. float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate
  100. AcceptWaveform(recognizer, 16000, tail_paddings, 4800);
  101. InputFinished(recognizer);
  102. Decode(recognizer);
  103. SherpaNcnnResult *r = GetResult(recognizer);
  104. fprintf(stderr, "%s\n", r->text);
  105. DestroyResult(r);
  106. DestroyRecognizer(recognizer);
  107. return 0;
  108. }