JacobLinCool Codex commited on
Commit
b03e3b9
·
verified ·
1 Parent(s): 7d1e08d

fix: accept uploaded wav voice notes

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (2) hide show
  1. app.py +12 -3
  2. tests/test_app.py +20 -0
app.py CHANGED
@@ -34,6 +34,7 @@ DATA_PATH = ROOT / "data" / "projects.json"
34
  INDEX_PATH = ROOT / "data" / "project_index.json"
35
  PROFILE_FIELDS = ["skills", "time", "preferences", "constraints"]
36
  MAX_AUDIO_UPLOAD_BYTES = 25 * 1024 * 1024
 
37
 
38
  index = ProjectIndex.from_files(DATA_PATH, INDEX_PATH)
39
  engine = AdvisorEngine(index)
@@ -204,16 +205,24 @@ def agent_turn_stream(payload: dict[str, Any] | None = Body(default=None)) -> St
204
  @app.post("/api/transcribe")
205
  async def transcribe_audio(audio: UploadFile = File(...)) -> dict[str, Any]:
206
  content_type = str(audio.content_type or "")
207
- if content_type and not content_type.startswith("audio/"):
 
 
208
  raise HTTPException(status_code=415, detail="Voice input must be an audio file.")
209
  with tempfile.TemporaryDirectory(prefix="advisor-upload-") as directory:
210
- filename = Path(str(audio.filename or "voice-note")).name
211
- suffix = Path(filename).suffix or ".audio"
212
  source = Path(directory) / f"voice{suffix}"
213
  await _save_audio_upload(audio, source)
214
  return _transcribe_voice(str(source))
215
 
216
 
 
 
 
 
 
 
 
 
217
  async def _save_audio_upload(upload: UploadFile, target: Path) -> None:
218
  total = 0
219
  with target.open("wb") as handle:
 
34
  INDEX_PATH = ROOT / "data" / "project_index.json"
35
  PROFILE_FIELDS = ["skills", "time", "preferences", "constraints"]
36
  MAX_AUDIO_UPLOAD_BYTES = 25 * 1024 * 1024
37
+ AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
38
 
39
  index = ProjectIndex.from_files(DATA_PATH, INDEX_PATH)
40
  engine = AdvisorEngine(index)
 
205
  @app.post("/api/transcribe")
206
  async def transcribe_audio(audio: UploadFile = File(...)) -> dict[str, Any]:
207
  content_type = str(audio.content_type or "")
208
+ filename = Path(str(audio.filename or "voice-note")).name
209
+ suffix = Path(filename).suffix.lower() or ".audio"
210
+ if not _is_audio_upload(content_type, suffix):
211
  raise HTTPException(status_code=415, detail="Voice input must be an audio file.")
212
  with tempfile.TemporaryDirectory(prefix="advisor-upload-") as directory:
 
 
213
  source = Path(directory) / f"voice{suffix}"
214
  await _save_audio_upload(audio, source)
215
  return _transcribe_voice(str(source))
216
 
217
 
218
+ def _is_audio_upload(content_type: str, suffix: str) -> bool:
219
+ if content_type.startswith("audio/"):
220
+ return True
221
+ if content_type in {"", "application/octet-stream"} and suffix in AUDIO_UPLOAD_SUFFIXES:
222
+ return True
223
+ return False
224
+
225
+
226
  async def _save_audio_upload(upload: UploadFile, target: Path) -> None:
227
  total = 0
228
  with target.open("wb") as handle:
tests/test_app.py CHANGED
@@ -123,6 +123,26 @@ def test_transcribe_audio_endpoint_saves_audio(monkeypatch) -> None:
123
  assert captured["path"].endswith(".wav")
124
 
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  def test_transcribe_audio_endpoint_rejects_non_audio() -> None:
127
  upload = DummyUpload(b"hello", filename="note.txt", content_type="text/plain")
128
 
 
123
  assert captured["path"].endswith(".wav")
124
 
125
 
126
+ def test_transcribe_audio_endpoint_accepts_octet_stream_audio(monkeypatch) -> None:
127
+ monkeypatch.setattr(
128
+ "app._transcribe_voice",
129
+ lambda path: {
130
+ "transcript": "A local-first memory archive.",
131
+ "model_id": "nvidia/nemotron-speech-streaming-en-0.6b",
132
+ "backend": "nemo-asr",
133
+ "sample_rate": 16000,
134
+ },
135
+ )
136
+
137
+ payload = asyncio.run(
138
+ transcribe_audio(
139
+ DummyUpload(b"RIFF....WAVE", filename="idea.wav", content_type="application/octet-stream")
140
+ )
141
+ )
142
+
143
+ assert payload["transcript"] == "A local-first memory archive."
144
+
145
+
146
  def test_transcribe_audio_endpoint_rejects_non_audio() -> None:
147
  upload = DummyUpload(b"hello", filename="note.txt", content_type="text/plain")
148