JacobLinCool Codex commited on
Commit
73bbb0c
·
verified ·
1 Parent(s): ca766b5

fix: make voice recording stoppable

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (3) hide show
  1. static/app.js +64 -18
  2. static/styles.css +10 -0
  3. tests/test_frontend_copy.py +4 -0
static/app.js CHANGED
@@ -50,6 +50,7 @@ let voiceBusy = false;
50
  let voiceRecorder = null;
51
  let voiceStream = null;
52
  let voiceChunks = [];
 
53
 
54
  bootstrap().catch(handleBootstrapError);
55
 
@@ -99,7 +100,7 @@ recordVoiceButton.addEventListener("click", async () => {
99
  });
100
 
101
  uploadVoiceButton.addEventListener("click", () => {
102
- if (uploadVoiceButton.disabled || voiceBusy || sessionControlsLocked) return;
103
  voiceFileInput.click();
104
  });
105
 
@@ -234,19 +235,23 @@ async function runCommand(command) {
234
  }
235
 
236
  async function toggleVoiceRecording() {
237
- if (voiceRecorder?.state === "recording") {
238
- voiceRecorder.stop();
239
  return;
240
  }
 
241
  await startVoiceRecording();
242
  }
243
 
244
  async function startVoiceRecording() {
245
- if (sessionControlsLocked || voiceBusy) return;
246
  if (!navigator.mediaDevices?.getUserMedia || !window.MediaRecorder) {
247
  setSessionStatus("Voice recording is not available in this browser. Upload a voice note instead.");
248
  return;
249
  }
 
 
 
250
  try {
251
  voiceStream = await navigator.mediaDevices.getUserMedia({ audio: true });
252
  voiceChunks = [];
@@ -257,32 +262,57 @@ async function startVoiceRecording() {
257
  });
258
  voiceRecorder.addEventListener("stop", () => {
259
  const recorderMimeType = voiceRecorder?.mimeType || mimeType || "audio/webm";
 
260
  stopVoiceStream();
261
  const extension = recorderMimeType.includes("mp4")
262
  ? "m4a"
263
  : recorderMimeType.includes("ogg")
264
  ? "ogg"
265
  : "webm";
266
- const blob = new Blob(voiceChunks, { type: recorderMimeType });
267
  voiceRecorder = null;
268
  voiceChunks = [];
269
- setActionButtonLabel(recordVoiceButton, "Speak");
270
- setVoiceControlsDisabled(false);
 
 
 
 
 
 
271
  transcribeVoiceBlob(blob, `recorded-idea.${extension}`);
272
  });
273
  voiceRecorder.start();
274
- setActionButtonLabel(recordVoiceButton, "Stop");
275
- setVoiceControlsDisabled(false);
276
  setSessionStatus("Listening. Press Stop when your idea is ready.");
277
  } catch (error) {
278
  stopVoiceStream();
279
  voiceRecorder = null;
280
- setActionButtonLabel(recordVoiceButton, "Speak");
281
- setVoiceControlsDisabled(false);
 
 
282
  setSessionStatus(`Voice recording could not start: ${error.message}`);
283
  }
284
  }
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  function recordingMimeType() {
287
  const candidates = ["audio/webm;codecs=opus", "audio/webm", "audio/ogg;codecs=opus", "audio/mp4"];
288
  return candidates.find((type) => MediaRecorder.isTypeSupported(type)) || "";
@@ -296,18 +326,18 @@ function stopVoiceStream() {
296
 
297
  async function transcribeVoiceBlob(blob, filename) {
298
  if (sessionControlsLocked || voiceBusy) return false;
 
299
  if (!blob?.size) {
300
  setSessionStatus("Voice note is empty.");
301
  return false;
302
  }
303
  const revision = bumpSessionRevision();
304
  voiceBusy = true;
 
305
  submit.disabled = true;
306
  input.disabled = true;
307
  setCommandDisabled(true);
308
  setSessionControlsDisabled(true);
309
- setVoiceControlsDisabled(true);
310
- setActionButtonLabel(recordVoiceButton, "Hearing...");
311
  setSessionStatus("Transcribing voice note.");
312
  try {
313
  const formData = new FormData();
@@ -331,13 +361,12 @@ async function transcribeVoiceBlob(blob, filename) {
331
  return false;
332
  } finally {
333
  voiceBusy = false;
334
- setActionButtonLabel(recordVoiceButton, "Speak");
335
  if (isCurrentSessionRevision(revision)) {
336
  submit.disabled = false;
337
  input.disabled = false;
338
  setSessionControlsDisabled(false);
339
  setCommandDisabled(false);
340
- setVoiceControlsDisabled(false);
341
  input.focus();
342
  }
343
  }
@@ -486,9 +515,26 @@ function setSessionControlsDisabled(disabled) {
486
  }
487
 
488
  function setVoiceControlsDisabled(disabled) {
489
- const recording = voiceRecorder?.state === "recording";
490
- recordVoiceButton.disabled = voiceBusy || (disabled && !recording) || !bootstrapData;
491
- uploadVoiceButton.disabled = voiceBusy || recording || disabled || !bootstrapData;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  }
493
 
494
  function resetSession() {
 
50
  let voiceRecorder = null;
51
  let voiceStream = null;
52
  let voiceChunks = [];
53
+ let voiceRecordingState = "idle";
54
 
55
  bootstrap().catch(handleBootstrapError);
56
 
 
100
  });
101
 
102
  uploadVoiceButton.addEventListener("click", () => {
103
+ if (uploadVoiceButton.disabled || voiceBusy || sessionControlsLocked || voiceRecordingState !== "idle") return;
104
  voiceFileInput.click();
105
  });
106
 
 
235
  }
236
 
237
  async function toggleVoiceRecording() {
238
+ if (voiceRecordingState === "recording" && voiceRecorder?.state === "recording") {
239
+ stopVoiceRecording();
240
  return;
241
  }
242
+ if (voiceRecordingState !== "idle") return;
243
  await startVoiceRecording();
244
  }
245
 
246
  async function startVoiceRecording() {
247
+ if (sessionControlsLocked || voiceBusy || voiceRecordingState !== "idle") return;
248
  if (!navigator.mediaDevices?.getUserMedia || !window.MediaRecorder) {
249
  setSessionStatus("Voice recording is not available in this browser. Upload a voice note instead.");
250
  return;
251
  }
252
+ setVoiceRecordingState("starting");
253
+ submit.disabled = true;
254
+ setCommandDisabled(true);
255
  try {
256
  voiceStream = await navigator.mediaDevices.getUserMedia({ audio: true });
257
  voiceChunks = [];
 
262
  });
263
  voiceRecorder.addEventListener("stop", () => {
264
  const recorderMimeType = voiceRecorder?.mimeType || mimeType || "audio/webm";
265
+ const recordedChunks = voiceChunks;
266
  stopVoiceStream();
267
  const extension = recorderMimeType.includes("mp4")
268
  ? "m4a"
269
  : recorderMimeType.includes("ogg")
270
  ? "ogg"
271
  : "webm";
272
+ const blob = new Blob(recordedChunks, { type: recorderMimeType });
273
  voiceRecorder = null;
274
  voiceChunks = [];
275
+ if (!blob.size) {
276
+ setVoiceRecordingState("idle");
277
+ submit.disabled = false;
278
+ setCommandDisabled(false);
279
+ setSessionStatus("Voice note is empty.");
280
+ return;
281
+ }
282
+ setVoiceRecordingState("transcribing");
283
  transcribeVoiceBlob(blob, `recorded-idea.${extension}`);
284
  });
285
  voiceRecorder.start();
286
+ setVoiceRecordingState("recording");
 
287
  setSessionStatus("Listening. Press Stop when your idea is ready.");
288
  } catch (error) {
289
  stopVoiceStream();
290
  voiceRecorder = null;
291
+ voiceChunks = [];
292
+ setVoiceRecordingState("idle");
293
+ submit.disabled = false;
294
+ setCommandDisabled(false);
295
  setSessionStatus(`Voice recording could not start: ${error.message}`);
296
  }
297
  }
298
 
299
+ function stopVoiceRecording() {
300
+ if (!voiceRecorder || voiceRecorder.state !== "recording") return;
301
+ setVoiceRecordingState("stopping");
302
+ setSessionStatus("Stopping recording.");
303
+ try {
304
+ voiceRecorder.stop();
305
+ } catch (error) {
306
+ stopVoiceStream();
307
+ voiceRecorder = null;
308
+ voiceChunks = [];
309
+ setVoiceRecordingState("idle");
310
+ submit.disabled = false;
311
+ setCommandDisabled(false);
312
+ setSessionStatus(`Voice recording could not stop: ${error.message}`);
313
+ }
314
+ }
315
+
316
  function recordingMimeType() {
317
  const candidates = ["audio/webm;codecs=opus", "audio/webm", "audio/ogg;codecs=opus", "audio/mp4"];
318
  return candidates.find((type) => MediaRecorder.isTypeSupported(type)) || "";
 
326
 
327
  async function transcribeVoiceBlob(blob, filename) {
328
  if (sessionControlsLocked || voiceBusy) return false;
329
+ if (voiceRecordingState !== "idle" && voiceRecordingState !== "transcribing") return false;
330
  if (!blob?.size) {
331
  setSessionStatus("Voice note is empty.");
332
  return false;
333
  }
334
  const revision = bumpSessionRevision();
335
  voiceBusy = true;
336
+ setVoiceRecordingState("transcribing");
337
  submit.disabled = true;
338
  input.disabled = true;
339
  setCommandDisabled(true);
340
  setSessionControlsDisabled(true);
 
 
341
  setSessionStatus("Transcribing voice note.");
342
  try {
343
  const formData = new FormData();
 
361
  return false;
362
  } finally {
363
  voiceBusy = false;
364
+ setVoiceRecordingState("idle");
365
  if (isCurrentSessionRevision(revision)) {
366
  submit.disabled = false;
367
  input.disabled = false;
368
  setSessionControlsDisabled(false);
369
  setCommandDisabled(false);
 
370
  input.focus();
371
  }
372
  }
 
515
  }
516
 
517
  function setVoiceControlsDisabled(disabled) {
518
+ const recording = voiceRecordingState === "recording" && voiceRecorder?.state === "recording";
519
+ const lockedForState = ["starting", "stopping", "transcribing"].includes(voiceRecordingState);
520
+ recordVoiceButton.disabled = !bootstrapData || voiceBusy || lockedForState || (disabled && !recording);
521
+ uploadVoiceButton.disabled = !bootstrapData || voiceBusy || disabled || voiceRecordingState !== "idle";
522
+ }
523
+
524
+ function setVoiceRecordingState(state) {
525
+ voiceRecordingState = state;
526
+ recordVoiceButton.dataset.voiceState = state;
527
+ recordVoiceButton.classList.toggle("recording", state === "recording");
528
+ recordVoiceButton.setAttribute("aria-pressed", state === "recording" ? "true" : "false");
529
+ const labels = {
530
+ idle: "Speak",
531
+ starting: "Starting...",
532
+ recording: "Stop",
533
+ stopping: "Stopping...",
534
+ transcribing: "Hearing...",
535
+ };
536
+ setActionButtonLabel(recordVoiceButton, labels[state] || "Speak");
537
+ setVoiceControlsDisabled(sessionControlsLocked);
538
  }
539
 
540
  function resetSession() {
static/styles.css CHANGED
@@ -364,6 +364,16 @@ textarea:disabled {
364
  border-color: #265935;
365
  }
366
 
 
 
 
 
 
 
 
 
 
 
367
  .btn-ghost {
368
  color: var(--ink-soft);
369
  background: transparent;
 
364
  border-color: #265935;
365
  }
366
 
367
+ .btn-voice.recording {
368
+ background: var(--oxblood);
369
+ border-color: var(--oxblood);
370
+ }
371
+
372
+ .btn-voice.recording:hover:not(:disabled) {
373
+ background: var(--oxblood-2);
374
+ border-color: var(--oxblood-2);
375
+ }
376
+
377
  .btn-ghost {
378
  color: var(--ink-soft);
379
  background: transparent;
tests/test_frontend_copy.py CHANGED
@@ -15,6 +15,10 @@ def test_main_interface_copy_is_builder_facing() -> None:
15
  assert "/api/agent-turn" in app_js
16
  assert "/api/transcribe" in app_js
17
  assert "MediaRecorder" in app_js
 
 
 
 
18
  assert "readNdjson" in app_js
19
  assert "@gradio/client" not in app_js
20
  assert "renderArtifactCanvas" not in app_js
 
15
  assert "/api/agent-turn" in app_js
16
  assert "/api/transcribe" in app_js
17
  assert "MediaRecorder" in app_js
18
+ assert "voiceRecordingState" in app_js
19
+ assert "stopVoiceRecording" in app_js
20
+ assert 'recording: "Stop"' in app_js
21
+ assert 'stopping: "Stopping..."' in app_js
22
  assert "readNdjson" in app_js
23
  assert "@gradio/client" not in app_js
24
  assert "renderArtifactCanvas" not in app_js