Program.cs 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. // Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2. using System;
  3. using System.Threading;
  4. using PortAudioSharp;
  5. using System.Runtime.InteropServices;
  6. class Microphone
  7. {
  8. public static void Main(String[] args)
  9. {
  10. String usage = @"
  11. ./microphone.exe \
  12. /path/to/tokens.txt \
  13. /path/to/encoder.ncnn.param \
  14. /path/to/encoder.ncnn.bin \
  15. /path/to/decoder.ncnn.param \
  16. /path/to/decoder.ncnn.bin \
  17. /path/to/joiner.ncnn.param \
  18. /path/to/joiner.ncnn.bin \
  19. [<num_threads> [decode_method]]
  20. num_threads: Default to 1
  21. decoding_method: greedy_search (default), or modified_beam_search
  22. Please refer to
  23. https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
  24. for a list of pre-trained models to download.
  25. ";
  26. if (args.Length < 7 || args.Length > 9)
  27. {
  28. Console.WriteLine(usage);
  29. return;
  30. }
  31. SherpaNcnn.OnlineRecognizerConfig config = new SherpaNcnn.OnlineRecognizerConfig();
  32. config.FeatConfig.SampleRate = 16000;
  33. config.FeatConfig.FeatureDim = 80;
  34. config.ModelConfig.Tokens = args[0];
  35. config.ModelConfig.EncoderParam = args[1];
  36. config.ModelConfig.EncoderBin = args[2];
  37. config.ModelConfig.DecoderParam = args[3];
  38. config.ModelConfig.DecoderBin = args[4];
  39. config.ModelConfig.JoinerParam = args[5];
  40. config.ModelConfig.JoinerBin = args[6];
  41. config.ModelConfig.UseVulkanCompute = 0;
  42. config.ModelConfig.NumThreads = 1;
  43. if (args.Length >= 8)
  44. {
  45. config.ModelConfig.NumThreads = Int32.Parse(args[7]);
  46. if (config.ModelConfig.NumThreads > 1)
  47. {
  48. Console.WriteLine($"Use num_threads: {config.ModelConfig.NumThreads}");
  49. }
  50. }
  51. config.DecoderConfig.DecodingMethod = "greedy_search";
  52. if (args.Length == 9 && args[8] != "greedy_search")
  53. {
  54. Console.WriteLine($"Use decoding_method {args[8]}");
  55. config.DecoderConfig.DecodingMethod = args[8];
  56. }
  57. config.DecoderConfig.NumActivePaths = 4;
  58. config.EnableEndpoint = 1;
  59. config.Rule1MinTrailingSilence = 2.4F;
  60. config.Rule2MinTrailingSilence = 1.2F;
  61. config.Rule3MinUtteranceLength = 20.0F;
  62. SherpaNcnn.OnlineRecognizer recognizer = new SherpaNcnn.OnlineRecognizer(config);
  63. SherpaNcnn.OnlineStream s = recognizer.CreateStream();
  64. Console.WriteLine(PortAudio.VersionInfo.versionText);
  65. PortAudio.Initialize();
  66. Console.WriteLine($"Number of devices: {PortAudio.DeviceCount}");
  67. for (int i = 0; i != PortAudio.DeviceCount; ++i)
  68. {
  69. Console.WriteLine($" Device {i}");
  70. DeviceInfo deviceInfo = PortAudio.GetDeviceInfo(i);
  71. Console.WriteLine($" Name: {deviceInfo.name}");
  72. Console.WriteLine($" Max input channels: {deviceInfo.maxInputChannels}");
  73. Console.WriteLine($" Default sample rate: {deviceInfo.defaultSampleRate}");
  74. }
  75. int deviceIndex = PortAudio.DefaultInputDevice;
  76. if (deviceIndex == PortAudio.NoDevice)
  77. {
  78. Console.WriteLine("No default input device found");
  79. Environment.Exit(1);
  80. }
  81. DeviceInfo info = PortAudio.GetDeviceInfo(deviceIndex);
  82. Console.WriteLine();
  83. Console.WriteLine($"Use default device {deviceIndex} ({info.name})");
  84. StreamParameters param = new StreamParameters();
  85. param.device = deviceIndex;
  86. param.channelCount = 1;
  87. param.sampleFormat = SampleFormat.Float32;
  88. param.suggestedLatency = info.defaultLowInputLatency;
  89. param.hostApiSpecificStreamInfo = IntPtr.Zero;
  90. PortAudioSharp.Stream.Callback callback = (IntPtr input, IntPtr output,
  91. UInt32 frameCount,
  92. ref StreamCallbackTimeInfo timeInfo,
  93. StreamCallbackFlags statusFlags,
  94. IntPtr userData
  95. ) =>
  96. {
  97. float[] samples = new float[frameCount];
  98. Marshal.Copy(input, samples, 0, (Int32)frameCount);
  99. s.AcceptWaveform(16000, samples);
  100. return StreamCallbackResult.Continue;
  101. };
  102. PortAudioSharp.Stream stream = new PortAudioSharp.Stream(inParams: param, outParams: null, sampleRate: 16000,
  103. framesPerBuffer: 0,
  104. streamFlags: StreamFlags.ClipOff,
  105. callback: callback,
  106. userData: IntPtr.Zero
  107. );
  108. Console.WriteLine(param);
  109. Console.WriteLine("Started! Please speak\n\n");
  110. stream.Start();
  111. String lastText = "";
  112. int segmentIndex = 0;
  113. while (true)
  114. {
  115. while (recognizer.IsReady(s))
  116. {
  117. recognizer.Decode(s);
  118. }
  119. var text = recognizer.GetResult(s).Text;
  120. bool isEndpoint = recognizer.IsEndpoint(s);
  121. if (!string.IsNullOrWhiteSpace(text) && lastText != text)
  122. {
  123. lastText = text;
  124. Console.Write($"\r{segmentIndex}: {lastText}");
  125. }
  126. if (isEndpoint)
  127. {
  128. if (!string.IsNullOrWhiteSpace(text))
  129. {
  130. ++segmentIndex;
  131. Console.WriteLine();
  132. }
  133. recognizer.Reset(s);
  134. }
  135. Thread.Sleep(200); // ms
  136. }
  137. PortAudio.Terminate();
  138. }
  139. }