decode-file.swift 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. import AVFoundation
  2. extension AudioBuffer {
  3. func array() -> [Float] {
  4. return Array(UnsafeBufferPointer(self))
  5. }
  6. }
  7. extension AVAudioPCMBuffer {
  8. func array() -> [Float] {
  9. return self.audioBufferList.pointee.mBuffers.array()
  10. }
  11. }
  12. func run() {
  13. let encoderParam =
  14. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param"
  15. let encoderBin =
  16. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin"
  17. let decoderParam =
  18. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param"
  19. let decoderBin =
  20. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin"
  21. let joinerParam =
  22. "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param"
  23. let joinerBin = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin"
  24. let tokens = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt"
  25. var modelConfig = sherpaNcnnModelConfig(
  26. encoderParam: encoderParam,
  27. encoderBin: encoderBin,
  28. decoderParam: decoderParam,
  29. decoderBin: decoderBin,
  30. joinerParam: joinerParam,
  31. joinerBin: joinerBin,
  32. tokens: tokens,
  33. numThreads: 4)
  34. var decoderConfig = sherpaNcnnDecoderConfig(
  35. decodingMethod: "modified_beam_search",
  36. numActivePaths: 4
  37. )
  38. let recognizer = SherpaNcnnRecognizer(
  39. modelConfig: &modelConfig,
  40. decoderConfig: &decoderConfig)
  41. let filePath = "./sherpa-ncnn-conv-emformer-transducer-2022-12-06/test_wavs/0.wav"
  42. let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  43. let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  44. let audioFormat = audioFile.processingFormat
  45. assert(audioFormat.sampleRate == 16000)
  46. assert(audioFormat.channelCount == 1)
  47. assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  48. let audioFrameCount = UInt32(audioFile.length)
  49. let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  50. try! audioFile.read(into: audioFileBuffer!)
  51. let array: [Float]! = audioFileBuffer?.array()
  52. recognizer.acceptWaveform(samples: array)
  53. let tailPadding = [Float](repeating: 0.0, count: 3200)
  54. recognizer.acceptWaveform(samples: tailPadding)
  55. recognizer.inputFinished()
  56. recognizer.decode()
  57. let result = recognizer.getResult()
  58. print("\nresult is:\n\(result.text)")
  59. }
  60. @main
  61. struct App {
  62. static func main() {
  63. run()
  64. }
  65. }