app.js 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. // This file copies and modifies code
  2. // from https://mdn.github.io/web-dictaphone/scripts/app.js
  3. // and https://gist.github.com/meziantou/edb7217fddfbb70e899e
  4. const startBtn = document.getElementById('startBtn');
  5. const stopBtn = document.getElementById('stopBtn');
  6. const clearBtn = document.getElementById('clearBtn');
  7. const hint = document.getElementById('hint');
  8. const soundClips = document.getElementById('sound-clips');
  9. let textArea = document.getElementById('results');
  10. let lastResult = '';
  11. let resultList = [];
  12. clearBtn.onclick = function() {
  13. resultList = [];
  14. textArea.value = getDisplayResult();
  15. textArea.scrollTop = textArea.scrollHeight; // auto scroll
  16. };
  17. function getDisplayResult() {
  18. let i = 0;
  19. let ans = '';
  20. for (let s in resultList) {
  21. if (resultList[s] == '') {
  22. continue;
  23. }
  24. ans += '' + i + ': ' + resultList[s] + '\n';
  25. i += 1;
  26. }
  27. if (lastResult.length > 0) {
  28. ans += '' + i + ': ' + lastResult + '\n';
  29. }
  30. return ans;
  31. }
  32. Module = {};
  33. Module.onRuntimeInitialized = function() {
  34. console.log('inited!');
  35. hint.innerText = 'Model loaded! Please click start';
  36. startBtn.disabled = false;
  37. recognizer = createRecognizer(Module);
  38. console.log('recognizer is created!', recognizer);
  39. };
  40. let audioCtx;
  41. let mediaStream;
  42. let expectedSampleRate = 16000;
  43. let recordSampleRate; // the sampleRate of the microphone
  44. let recorder = null; // the microphone
  45. let leftchannel = []; // TODO: Use a single channel
  46. let recordingLength = 0; // number of samples so far
  47. let recognizer = null;
  48. let recognizer_stream = null;
  49. if (navigator.mediaDevices.getUserMedia) {
  50. console.log('getUserMedia supported.');
  51. // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
  52. const constraints = {audio: true};
  53. let onSuccess = function(stream) {
  54. if (!audioCtx) {
  55. audioCtx = new AudioContext({sampleRate: 16000});
  56. }
  57. console.log(audioCtx);
  58. recordSampleRate = audioCtx.sampleRate;
  59. console.log('sample rate ' + recordSampleRate);
  60. // creates an audio node from the microphone incoming stream
  61. mediaStream = audioCtx.createMediaStreamSource(stream);
  62. console.log('media stream', mediaStream);
  63. // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
  64. // bufferSize: the onaudioprocess event is called when the buffer is full
  65. var bufferSize = 4096;
  66. var numberOfInputChannels = 1;
  67. var numberOfOutputChannels = 2;
  68. if (audioCtx.createScriptProcessor) {
  69. recorder = audioCtx.createScriptProcessor(
  70. bufferSize, numberOfInputChannels, numberOfOutputChannels);
  71. } else {
  72. recorder = audioCtx.createJavaScriptNode(
  73. bufferSize, numberOfInputChannels, numberOfOutputChannels);
  74. }
  75. console.log('recorder', recorder);
  76. recorder.onaudioprocess = function(e) {
  77. let samples = new Float32Array(e.inputBuffer.getChannelData(0))
  78. samples = downsampleBuffer(samples, expectedSampleRate);
  79. if (recognizer_stream == null) {
  80. recognizer_stream = recognizer.createStream();
  81. }
  82. recognizer_stream.acceptWaveform(expectedSampleRate, samples);
  83. while (recognizer.isReady(recognizer_stream)) {
  84. recognizer.decode(recognizer_stream);
  85. }
  86. let isEndpoint = recognizer.isEndpoint(recognizer_stream);
  87. let result = recognizer.getResult(recognizer_stream);
  88. if (result.length > 0 && lastResult != result) {
  89. lastResult = result;
  90. }
  91. if (isEndpoint) {
  92. if (lastResult.length > 0) {
  93. resultList.push(lastResult);
  94. lastResult = '';
  95. }
  96. recognizer.reset(recognizer_stream);
  97. }
  98. textArea.value = getDisplayResult();
  99. textArea.scrollTop = textArea.scrollHeight; // auto scroll
  100. let buf = new Int16Array(samples.length);
  101. for (var i = 0; i < samples.length; ++i) {
  102. let s = samples[i];
  103. if (s >= 1)
  104. s = 1;
  105. else if (s <= -1)
  106. s = -1;
  107. samples[i] = s;
  108. buf[i] = s * 32767;
  109. }
  110. leftchannel.push(buf);
  111. recordingLength += bufferSize;
  112. };
  113. startBtn.onclick = function() {
  114. mediaStream.connect(recorder);
  115. recorder.connect(audioCtx.destination);
  116. console.log('recorder started');
  117. stopBtn.disabled = false;
  118. startBtn.disabled = true;
  119. };
  120. stopBtn.onclick = function() {
  121. console.log('recorder stopped');
  122. // stopBtn recording
  123. recorder.disconnect(audioCtx.destination);
  124. mediaStream.disconnect(recorder);
  125. startBtn.style.background = '';
  126. startBtn.style.color = '';
  127. // mediaRecorder.requestData();
  128. stopBtn.disabled = true;
  129. startBtn.disabled = false;
  130. var clipName = new Date().toISOString();
  131. const clipContainer = document.createElement('article');
  132. const clipLabel = document.createElement('p');
  133. const audio = document.createElement('audio');
  134. const deleteButton = document.createElement('button');
  135. clipContainer.classList.add('clip');
  136. audio.setAttribute('controls', '');
  137. deleteButton.textContent = 'Delete';
  138. deleteButton.className = 'delete';
  139. clipLabel.textContent = clipName;
  140. clipContainer.appendChild(audio);
  141. clipContainer.appendChild(clipLabel);
  142. clipContainer.appendChild(deleteButton);
  143. soundClips.appendChild(clipContainer);
  144. audio.controls = true;
  145. let samples = flatten(leftchannel);
  146. const blob = toWav(samples);
  147. leftchannel = [];
  148. const audioURL = window.URL.createObjectURL(blob);
  149. audio.src = audioURL;
  150. console.log('recorder stopped');
  151. deleteButton.onclick = function(e) {
  152. let evtTgt = e.target;
  153. evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
  154. };
  155. clipLabel.onclick = function() {
  156. const existingName = clipLabel.textContent;
  157. const newClipName = prompt('Enter a new name for your sound clip?');
  158. if (newClipName === null) {
  159. clipLabel.textContent = existingName;
  160. } else {
  161. clipLabel.textContent = newClipName;
  162. }
  163. };
  164. };
  165. };
  166. let onError = function(err) {
  167. console.log('The following error occured: ' + err);
  168. };
  169. navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
  170. } else {
  171. console.log('getUserMedia not supported on your browser!');
  172. alert('getUserMedia not supported on your browser!');
  173. }
  174. // this function is copied/modified from
  175. // https://gist.github.com/meziantou/edb7217fddfbb70e899e
  176. function flatten(listOfSamples) {
  177. let n = 0;
  178. for (let i = 0; i < listOfSamples.length; ++i) {
  179. n += listOfSamples[i].length;
  180. }
  181. let ans = new Int16Array(n);
  182. let offset = 0;
  183. for (let i = 0; i < listOfSamples.length; ++i) {
  184. ans.set(listOfSamples[i], offset);
  185. offset += listOfSamples[i].length;
  186. }
  187. return ans;
  188. }
  189. // this function is copied/modified from
  190. // https://gist.github.com/meziantou/edb7217fddfbb70e899e
  191. function toWav(samples) {
  192. let buf = new ArrayBuffer(44 + samples.length * 2);
  193. var view = new DataView(buf);
  194. // http://soundfile.sapp.org/doc/WaveFormat/
  195. // F F I R
  196. view.setUint32(0, 0x46464952, true); // chunkID
  197. view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
  198. // E V A W
  199. view.setUint32(8, 0x45564157, true); // format
  200. //
  201. // t m f
  202. view.setUint32(12, 0x20746d66, true); // subchunk1ID
  203. view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  204. view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  205. view.setUint16(22, 1, true); // numChannels: 1 channel
  206. view.setUint32(24, expectedSampleRate, true); // sampleRate
  207. view.setUint32(28, expectedSampleRate * 2, true); // byteRate
  208. view.setUint16(32, 2, true); // blockAlign
  209. view.setUint16(34, 16, true); // bitsPerSample
  210. view.setUint32(36, 0x61746164, true); // Subchunk2ID
  211. view.setUint32(40, samples.length * 2, true); // subchunk2Size
  212. let offset = 44;
  213. for (let i = 0; i < samples.length; ++i) {
  214. view.setInt16(offset, samples[i], true);
  215. offset += 2;
  216. }
  217. return new Blob([view], {type: 'audio/wav'});
  218. }
  219. // this function is copied from
  220. // https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
  221. function downsampleBuffer(buffer, exportSampleRate) {
  222. if (exportSampleRate === recordSampleRate) {
  223. return buffer;
  224. }
  225. var sampleRateRatio = recordSampleRate / exportSampleRate;
  226. var newLength = Math.round(buffer.length / sampleRateRatio);
  227. var result = new Float32Array(newLength);
  228. var offsetResult = 0;
  229. var offsetBuffer = 0;
  230. while (offsetResult < result.length) {
  231. var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
  232. var accum = 0, count = 0;
  233. for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
  234. accum += buffer[i];
  235. count++;
  236. }
  237. result[offsetResult] = accum / count;
  238. offsetResult++;
  239. offsetBuffer = nextOffsetBuffer;
  240. }
  241. return result;
  242. };