staraks commited on
Commit
deba159
·
verified ·
1 Parent(s): 4236885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +202 -18
app.py CHANGED
@@ -181,10 +181,14 @@ def get_model() -> WhisperModel:
181
 
182
 
183
  def build_transcription_params(mode: str):
 
 
 
 
184
  params = {
185
  "task": "transcribe",
186
- "beam_size": 5,
187
- "best_of": 5,
188
  "temperature": 0.0,
189
  }
190
 
@@ -196,6 +200,7 @@ def build_transcription_params(mode: str):
196
  "Write in a formal clinical style."
197
  )
198
  else:
 
199
  params["language"] = None
200
 
201
  return params
@@ -375,17 +380,19 @@ HTTP API for Whisper (via faster-whisper) with:
375
  - Multi-file audio upload (including .dct where supported by ffmpeg)
376
  - Password-protected ZIP upload (default password: dietcoke1)
377
  - Option to ONLY extract ZIP and list audio names (no transcription)
 
378
  - Medical-biased transcription mode
379
  - Persistent word/phrase memory (replacements)
380
  - Extraction & saving of frequent 'medical terms' from transcripts
381
  - Combined transcript + DOCX export
 
382
 
383
  If a .dct file uses a proprietary codec that ffmpeg cannot decode,
384
  you will get a clear error suggesting to convert to WAV/MP3 first.
385
 
386
  Use `/docs` for Swagger UI and `/ui` for the web interface.
387
  """,
388
- version="2.4.0",
389
  )
390
 
391
 
@@ -430,6 +437,7 @@ def self_test():
430
  "memory_rules": num_rules,
431
  "medical_terms_count": med_count,
432
  "zip_default_password": DEFAULT_ZIP_PASSWORD,
 
433
  }
434
  )
435
  except Exception as e:
@@ -587,7 +595,7 @@ def zip_extract_only(
587
  )
588
 
589
 
590
- # ---------- 4. ZIP transcription (JSON) ----------
591
 
592
  @app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
593
  def transcribe_zip(
@@ -648,7 +656,101 @@ def transcribe_zip(
648
  )
649
 
650
 
651
- # ---------- 5. ZIP transcription (DOCX) ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
652
 
653
  @app.post("/api/transcribe/zip/docx")
654
  def transcribe_zip_docx(
@@ -708,11 +810,6 @@ def transcribe_zip_docx(
708
 
709
  # ===================== MEMORY ENDPOINTS =====================
710
 
711
- class MemoryRule(BaseModel):
712
- source: str
713
- target: str
714
-
715
-
716
  @app.get("/api/memory", response_model=MemoryResponse)
717
  def get_memory():
718
  mem = load_memory()
@@ -998,7 +1095,7 @@ HTML_UI = r"""<!DOCTYPE html>
998
  <h1>Whisper – Medical Batch Transcription (faster-whisper CPU)</h1>
999
  <p>
1000
  Multi-file & ZIP transcription with medical mode, .dct support (where decodable), ZIP extract-only mode,
1001
- and memory of preferred terms + collected medical vocabulary.
1002
  Default ZIP password: <code>dietcoke1</code>. API docs: <code>/docs</code>.
1003
  </p>
1004
  </header>
@@ -1106,13 +1203,19 @@ HTML_UI = r"""<!DOCTYPE html>
1106
 
1107
  <div class="btn-row">
1108
  <button class="btn-secondary" id="btn_zip_extract_only">Extract only & list audio files</button>
1109
- <button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON</button>
1110
- <button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX</button>
 
 
 
 
 
 
1111
  </div>
1112
  </div>
1113
  <div class="col">
1114
  <h3>ZIP combined transcript</h3>
1115
- <textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON button."></textarea>
1116
  </div>
1117
  </div>
1118
  </div>
@@ -1179,6 +1282,15 @@ HTML_UI = r"""<!DOCTYPE html>
1179
  -F "mode=medical_en" \
1180
  -F "extract_terms=true" \
1181
  -F "files=@path/to/audio1.flac"</code></pre>
 
 
 
 
 
 
 
 
 
1182
  </div>
1183
  </div>
1184
  </main>
@@ -1424,6 +1536,7 @@ HTML_UI = r"""<!DOCTYPE html>
1424
  document.getElementById("btn_zip_extract_only").addEventListener("click", async () => {
1425
  const zipInput = document.getElementById("zip_input");
1426
  const pwd = document.getElementById("zip_password").value || "";
 
1427
 
1428
  if (!zipInput.files.length) {
1429
  alert("Please choose a ZIP file.");
@@ -1440,8 +1553,29 @@ HTML_UI = r"""<!DOCTYPE html>
1440
  try {
1441
  const data = await postForm("/api/zip/extract", formData, false);
1442
  const count = data.count || (data.audio_files ? data.audio_files.length : 0);
1443
- const names = (data.audio_files || []).join(", ");
1444
- setStatus("Extracted " + count + " audio file(s) from ZIP: " + names);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1445
  finishProgress("ZIP extraction complete");
1446
  } catch (err) {
1447
  console.error(err);
@@ -1450,7 +1584,7 @@ HTML_UI = r"""<!DOCTYPE html>
1450
  }
1451
  });
1452
 
1453
- // ZIP JSON
1454
  document.getElementById("btn_zip_json").addEventListener("click", async () => {
1455
  const zipInput = document.getElementById("zip_input");
1456
  const pwd = document.getElementById("zip_password").value || "";
@@ -1491,7 +1625,57 @@ HTML_UI = r"""<!DOCTYPE html>
1491
  }
1492
  });
1493
 
1494
- // ZIP DOCX
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1495
  document.getElementById("btn_zip_docx").addEventListener("click", async () => {
1496
  const zipInput = document.getElementById("zip_input");
1497
  const pwd = document.getElementById("zip_password").value || "";
 
181
 
182
 
183
  def build_transcription_params(mode: str):
184
+ """
185
+ Fast, CPU-friendly settings:
186
+ - greedy decoding (beam_size=1, best_of=1)
187
+ """
188
  params = {
189
  "task": "transcribe",
190
+ "beam_size": 1, # was 5 → faster
191
+ "best_of": 1, # was 5 → faster
192
  "temperature": 0.0,
193
  }
194
 
 
200
  "Write in a formal clinical style."
201
  )
202
  else:
203
+ # leave language autodetect for general mode
204
  params["language"] = None
205
 
206
  return params
 
380
  - Multi-file audio upload (including .dct where supported by ffmpeg)
381
  - Password-protected ZIP upload (default password: dietcoke1)
382
  - Option to ONLY extract ZIP and list audio names (no transcription)
383
+ - NEW: ZIP → choose selected files to transcribe
384
  - Medical-biased transcription mode
385
  - Persistent word/phrase memory (replacements)
386
  - Extraction & saving of frequent 'medical terms' from transcripts
387
  - Combined transcript + DOCX export
388
+ - Fast greedy decoding for CPU (beam_size=1, best_of=1)
389
 
390
  If a .dct file uses a proprietary codec that ffmpeg cannot decode,
391
  you will get a clear error suggesting to convert to WAV/MP3 first.
392
 
393
  Use `/docs` for Swagger UI and `/ui` for the web interface.
394
  """,
395
+ version="2.5.0",
396
  )
397
 
398
 
 
437
  "memory_rules": num_rules,
438
  "medical_terms_count": med_count,
439
  "zip_default_password": DEFAULT_ZIP_PASSWORD,
440
+ "decoding": "fast (beam_size=1, best_of=1)",
441
  }
442
  )
443
  except Exception as e:
 
595
  )
596
 
597
 
598
+ # ---------- 4. ZIP transcription (JSON) – ALL FILES ----------
599
 
600
  @app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
601
  def transcribe_zip(
 
656
  )
657
 
658
 
659
+ # ---------- 5. ZIP transcription (JSON) – SELECTED FILES ONLY ----------
660
+
661
+ @app.post("/api/transcribe/zip/selected", response_model=TranscriptionResponse)
662
+ def transcribe_zip_selected(
663
+ file: UploadFile = File(..., description="ZIP file containing audio files"),
664
+ password: str = Form(
665
+ "",
666
+ description="ZIP password. Leave blank to use default 'dietcoke1'.",
667
+ ),
668
+ selected_files: str = Form(
669
+ "",
670
+ description="Comma-separated file names (inside ZIP) to transcribe",
671
+ ),
672
+ mode: Literal["general", "medical_en"] = Form("medical_en"),
673
+ extract_terms: bool = Form(False),
674
+ ):
675
+ """
676
+ Extract ZIP, then ONLY transcribe the subset of files whose basenames are
677
+ passed in 'selected_files' (comma-separated).
678
+ """
679
+ if file is None:
680
+ raise HTTPException(status_code=400, detail="No ZIP uploaded.")
681
+
682
+ effective_password = password if password else DEFAULT_ZIP_PASSWORD
683
+
684
+ selected_set = {
685
+ name.strip()
686
+ for name in (selected_files or "").split(",")
687
+ if name.strip()
688
+ }
689
+ if not selected_set:
690
+ raise HTTPException(
691
+ status_code=400,
692
+ detail="No selected_files provided. Please choose at least one file from the ZIP.",
693
+ )
694
+
695
+ extracted_paths = extract_zip_to_temp(file, effective_password)
696
+ audio_paths = filter_audio_files(extracted_paths)
697
+
698
+ if not audio_paths:
699
+ raise HTTPException(
700
+ status_code=400,
701
+ detail=(
702
+ "No valid audio files in ZIP. "
703
+ f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
704
+ ),
705
+ )
706
+
707
+ # Map names -> paths for quick lookup
708
+ name_to_path = {}
709
+ for p in audio_paths:
710
+ base = os.path.basename(p)
711
+ if base in selected_set:
712
+ name_to_path[base] = p
713
+
714
+ if not name_to_path:
715
+ raise HTTPException(
716
+ status_code=400,
717
+ detail="None of the selected_files were found as audio inside the ZIP.",
718
+ )
719
+
720
+ items: List[FileTranscript] = []
721
+ try:
722
+ # keep order in which user selected (or alphabetical; here we just iterate on set intersection)
723
+ for fname in sorted(name_to_path.keys()):
724
+ path = name_to_path[fname]
725
+ text = transcribe_file(path, mode)
726
+ items.append(FileTranscript(filename=fname, text=text))
727
+ except RuntimeError as e:
728
+ msg = str(e)
729
+ if "Audio decoder could not read file" in msg:
730
+ raise HTTPException(status_code=400, detail=msg) from e
731
+ raise HTTPException(
732
+ status_code=500,
733
+ detail=f"Transcription failed (ZIP selected): {msg}",
734
+ ) from e
735
+
736
+ combined = format_combined(items)
737
+ filenames = [it.filename for it in items]
738
+
739
+ new_terms: List[str] = []
740
+ if extract_terms and combined:
741
+ new_terms = update_med_terms_from_text(combined)
742
+
743
+ return TranscriptionResponse(
744
+ mode=mode,
745
+ combined_transcript=combined,
746
+ items=items,
747
+ file_count=len(items),
748
+ audio_files=filenames,
749
+ new_medical_terms=new_terms,
750
+ )
751
+
752
+
753
+ # ---------- 6. ZIP transcription (DOCX) – ALL FILES ----------
754
 
755
  @app.post("/api/transcribe/zip/docx")
756
  def transcribe_zip_docx(
 
810
 
811
  # ===================== MEMORY ENDPOINTS =====================
812
 
 
 
 
 
 
813
  @app.get("/api/memory", response_model=MemoryResponse)
814
  def get_memory():
815
  mem = load_memory()
 
1095
  <h1>Whisper – Medical Batch Transcription (faster-whisper CPU)</h1>
1096
  <p>
1097
  Multi-file & ZIP transcription with medical mode, .dct support (where decodable), ZIP extract-only mode,
1098
+ selectable ZIP files for transcription, and memory of preferred terms + collected medical vocabulary.
1099
  Default ZIP password: <code>dietcoke1</code>. API docs: <code>/docs</code>.
1100
  </p>
1101
  </header>
 
1203
 
1204
  <div class="btn-row">
1205
  <button class="btn-secondary" id="btn_zip_extract_only">Extract only & list audio files</button>
1206
+ <button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON (all files)</button>
1207
+ <button class="btn-secondary" id="btn_zip_selected">Transcribe selected from ZIP → JSON</button>
1208
+ <button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX (all files)</button>
1209
+ </div>
1210
+
1211
+ <h3>Files inside ZIP (select to transcribe)</h3>
1212
+ <div id="zip_file_list" class="small-hint">
1213
+ Run "Extract only & list audio files" to see files and choose which ones to transcribe.
1214
  </div>
1215
  </div>
1216
  <div class="col">
1217
  <h3>ZIP combined transcript</h3>
1218
+ <textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON buttons."></textarea>
1219
  </div>
1220
  </div>
1221
  </div>
 
1282
  -F "mode=medical_en" \
1283
  -F "extract_terms=true" \
1284
  -F "files=@path/to/audio1.flac"</code></pre>
1285
+
1286
+ <h3>ZIP selected files JSON</h3>
1287
+ <pre><code>curl -X POST \
1288
+ "https://staraks-whisper-large-v3.hf.space/api/transcribe/zip/selected" \
1289
+ -H "Accept: application/json" \
1290
+ -F "mode=medical_en" \
1291
+ -F "extract_terms=true" \
1292
+ -F "selected_files=file1.wav,file3.dct" \
1293
+ -F "file=@path/to/archive.zip"</code></pre>
1294
  </div>
1295
  </div>
1296
  </main>
 
1536
  document.getElementById("btn_zip_extract_only").addEventListener("click", async () => {
1537
  const zipInput = document.getElementById("zip_input");
1538
  const pwd = document.getElementById("zip_password").value || "";
1539
+ const listDiv = document.getElementById("zip_file_list");
1540
 
1541
  if (!zipInput.files.length) {
1542
  alert("Please choose a ZIP file.");
 
1553
  try {
1554
  const data = await postForm("/api/zip/extract", formData, false);
1555
  const count = data.count || (data.audio_files ? data.audio_files.length : 0);
1556
+ const names = data.audio_files || [];
1557
+ setStatus("Extracted " + count + " audio file(s) from ZIP.");
1558
+
1559
+ // populate selectable list
1560
+ if (names.length) {
1561
+ listDiv.innerHTML = "";
1562
+ names.forEach(name => {
1563
+ const safeId = "zip_choice_" + name.replace(/[^a-zA-Z0-9_\-]/g, "_");
1564
+ const label = document.createElement("label");
1565
+ label.style.display = "block";
1566
+ const cb = document.createElement("input");
1567
+ cb.type = "checkbox";
1568
+ cb.value = name;
1569
+ cb.id = safeId;
1570
+ cb.checked = true;
1571
+ label.appendChild(cb);
1572
+ label.append(" " + name);
1573
+ listDiv.appendChild(label);
1574
+ });
1575
+ } else {
1576
+ listDiv.innerHTML = "No audio files found in ZIP.";
1577
+ }
1578
+
1579
  finishProgress("ZIP extraction complete");
1580
  } catch (err) {
1581
  console.error(err);
 
1584
  }
1585
  });
1586
 
1587
+ // ZIP JSON – ALL FILES
1588
  document.getElementById("btn_zip_json").addEventListener("click", async () => {
1589
  const zipInput = document.getElementById("zip_input");
1590
  const pwd = document.getElementById("zip_password").value || "";
 
1625
  }
1626
  });
1627
 
1628
+ // ZIP JSON – SELECTED FILES ONLY
1629
+ document.getElementById("btn_zip_selected").addEventListener("click", async () => {
1630
+ const zipInput = document.getElementById("zip_input");
1631
+ const pwd = document.getElementById("zip_password").value || "";
1632
+ const mode = document.getElementById("zip_mode").value;
1633
+ const extractTerms = document.getElementById("zip_extract_terms").checked;
1634
+ const out = document.getElementById("zip_output");
1635
+ const listDiv = document.getElementById("zip_file_list");
1636
+
1637
+ if (!zipInput.files.length) {
1638
+ alert("Please choose a ZIP file.");
1639
+ return;
1640
+ }
1641
+
1642
+ const checkboxes = listDiv.querySelectorAll("input[type='checkbox']:checked");
1643
+ if (!checkboxes.length) {
1644
+ alert("Please select at least one file from the ZIP (use the checkboxes).");
1645
+ return;
1646
+ }
1647
+ const names = Array.from(checkboxes).map(cb => cb.value);
1648
+
1649
+ const formData = new FormData();
1650
+ formData.append("file", zipInput.files[0]);
1651
+ formData.append("password", pwd);
1652
+ formData.append("mode", mode);
1653
+ formData.append("extract_terms", extractTerms ? "true" : "false");
1654
+ formData.append("selected_files", names.join(","));
1655
+
1656
+ setStatus("Uploading ZIP and transcribing selected files only…");
1657
+ out.value = "";
1658
+ startSimulatedProgress("Uploading & transcribing selected ZIP files");
1659
+
1660
+ try {
1661
+ const data = await postForm("/api/transcribe/zip/selected", formData, false);
1662
+ out.value = data.combined_transcript || "";
1663
+ const count = data.file_count || (data.items ? data.items.length : 0);
1664
+ const transcribedNames = (data.audio_files || []).join(", ");
1665
+ let extra = "";
1666
+ if (data.new_medical_terms && data.new_medical_terms.length) {
1667
+ extra = " New medical terms added: " + data.new_medical_terms.join(", ");
1668
+ }
1669
+ setStatus("Done. Transcribed " + count + " selected file(s) from ZIP: " + transcribedNames + extra);
1670
+ finishProgress("Selected ZIP transcription complete");
1671
+ } catch (err) {
1672
+ console.error(err);
1673
+ alert(err.message);
1674
+ errorProgress(err.message || "Error during selected ZIP transcription.");
1675
+ }
1676
+ });
1677
+
1678
+ // ZIP DOCX – ALL FILES
1679
  document.getElementById("btn_zip_docx").addEventListener("click", async () => {
1680
  const zipInput = document.getElementById("zip_input");
1681
  const pwd = document.getElementById("zip_password").value || "";