decode-file.swift 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. import AVFoundation
  2. extension AudioBuffer {
  3. func array() -> [Float] {
  4. return Array(UnsafeBufferPointer(self))
  5. }
  6. }
  7. extension AVAudioPCMBuffer {
  8. func array() -> [Float] {
  9. return self.audioBufferList.pointee.mBuffers.array()
  10. }
  11. }
  12. func run() {
  13. let encoderParam =
  14. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param"
  15. let encoderBin =
  16. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin"
  17. let decoderParam =
  18. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param"
  19. let decoderBin =
  20. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin"
  21. let joinerParam =
  22. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param"
  23. let joinerBin = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin"
  24. let tokens = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt"
  25. let featConfig = sherpaNcnnFeatureExtractorConfig(
  26. sampleRate: 16000,
  27. featureDim: 80
  28. )
  29. let modelConfig = sherpaNcnnModelConfig(
  30. encoderParam: encoderParam,
  31. encoderBin: encoderBin,
  32. decoderParam: decoderParam,
  33. decoderBin: decoderBin,
  34. joinerParam: joinerParam,
  35. joinerBin: joinerBin,
  36. tokens: tokens,
  37. numThreads: 4)
  38. let decoderConfig = sherpaNcnnDecoderConfig(
  39. decodingMethod: "modified_beam_search",
  40. numActivePaths: 4
  41. )
  42. var config = sherpaNcnnRecognizerConfig(
  43. featConfig: featConfig,
  44. modelConfig: modelConfig,
  45. decoderConfig: decoderConfig
  46. )
  47. let recognizer = SherpaNcnnRecognizer(config: &config)
  48. let filePath = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/test_wavs/0.wav"
  49. let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  50. let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  51. let audioFormat = audioFile.processingFormat
  52. assert(audioFormat.sampleRate == 16000)
  53. assert(audioFormat.channelCount == 1)
  54. assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  55. let audioFrameCount = UInt32(audioFile.length)
  56. let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  57. try! audioFile.read(into: audioFileBuffer!)
  58. let array: [Float]! = audioFileBuffer?.array()
  59. recognizer.acceptWaveform(samples: array)
  60. let tailPadding = [Float](repeating: 0.0, count: 3200)
  61. recognizer.acceptWaveform(samples: tailPadding)
  62. recognizer.inputFinished()
  63. while (recognizer.isReady()) {
  64. recognizer.decode()
  65. }
  66. let result = recognizer.getResult()
  67. print("\nresult is:\n\(result.text)")
  68. }
  69. @main
  70. struct App {
  71. static func main() {
  72. run()
  73. }
  74. }