Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -181,10 +181,14 @@ def get_model() -> WhisperModel:
|
|
| 181 |
|
| 182 |
|
| 183 |
def build_transcription_params(mode: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
params = {
|
| 185 |
"task": "transcribe",
|
| 186 |
-
"beam_size":
|
| 187 |
-
"best_of":
|
| 188 |
"temperature": 0.0,
|
| 189 |
}
|
| 190 |
|
|
@@ -196,6 +200,7 @@ def build_transcription_params(mode: str):
|
|
| 196 |
"Write in a formal clinical style."
|
| 197 |
)
|
| 198 |
else:
|
|
|
|
| 199 |
params["language"] = None
|
| 200 |
|
| 201 |
return params
|
|
@@ -375,17 +380,19 @@ HTTP API for Whisper (via faster-whisper) with:
|
|
| 375 |
- Multi-file audio upload (including .dct where supported by ffmpeg)
|
| 376 |
- Password-protected ZIP upload (default password: dietcoke1)
|
| 377 |
- Option to ONLY extract ZIP and list audio names (no transcription)
|
|
|
|
| 378 |
- Medical-biased transcription mode
|
| 379 |
- Persistent word/phrase memory (replacements)
|
| 380 |
- Extraction & saving of frequent 'medical terms' from transcripts
|
| 381 |
- Combined transcript + DOCX export
|
|
|
|
| 382 |
|
| 383 |
If a .dct file uses a proprietary codec that ffmpeg cannot decode,
|
| 384 |
you will get a clear error suggesting to convert to WAV/MP3 first.
|
| 385 |
|
| 386 |
Use `/docs` for Swagger UI and `/ui` for the web interface.
|
| 387 |
""",
|
| 388 |
-
version="2.
|
| 389 |
)
|
| 390 |
|
| 391 |
|
|
@@ -430,6 +437,7 @@ def self_test():
|
|
| 430 |
"memory_rules": num_rules,
|
| 431 |
"medical_terms_count": med_count,
|
| 432 |
"zip_default_password": DEFAULT_ZIP_PASSWORD,
|
|
|
|
| 433 |
}
|
| 434 |
)
|
| 435 |
except Exception as e:
|
|
@@ -587,7 +595,7 @@ def zip_extract_only(
|
|
| 587 |
)
|
| 588 |
|
| 589 |
|
| 590 |
-
# ---------- 4. ZIP transcription (JSON) ----------
|
| 591 |
|
| 592 |
@app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
|
| 593 |
def transcribe_zip(
|
|
@@ -648,7 +656,101 @@ def transcribe_zip(
|
|
| 648 |
)
|
| 649 |
|
| 650 |
|
| 651 |
-
# ---------- 5. ZIP transcription (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 652 |
|
| 653 |
@app.post("/api/transcribe/zip/docx")
|
| 654 |
def transcribe_zip_docx(
|
|
@@ -708,11 +810,6 @@ def transcribe_zip_docx(
|
|
| 708 |
|
| 709 |
# ===================== MEMORY ENDPOINTS =====================
|
| 710 |
|
| 711 |
-
class MemoryRule(BaseModel):
|
| 712 |
-
source: str
|
| 713 |
-
target: str
|
| 714 |
-
|
| 715 |
-
|
| 716 |
@app.get("/api/memory", response_model=MemoryResponse)
|
| 717 |
def get_memory():
|
| 718 |
mem = load_memory()
|
|
@@ -998,7 +1095,7 @@ HTML_UI = r"""<!DOCTYPE html>
|
|
| 998 |
<h1>Whisper – Medical Batch Transcription (faster-whisper CPU)</h1>
|
| 999 |
<p>
|
| 1000 |
Multi-file & ZIP transcription with medical mode, .dct support (where decodable), ZIP extract-only mode,
|
| 1001 |
-
and memory of preferred terms + collected medical vocabulary.
|
| 1002 |
Default ZIP password: <code>dietcoke1</code>. API docs: <code>/docs</code>.
|
| 1003 |
</p>
|
| 1004 |
</header>
|
|
@@ -1106,13 +1203,19 @@ HTML_UI = r"""<!DOCTYPE html>
|
|
| 1106 |
|
| 1107 |
<div class="btn-row">
|
| 1108 |
<button class="btn-secondary" id="btn_zip_extract_only">Extract only & list audio files</button>
|
| 1109 |
-
<button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON</button>
|
| 1110 |
-
<button class="btn-secondary" id="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1111 |
</div>
|
| 1112 |
</div>
|
| 1113 |
<div class="col">
|
| 1114 |
<h3>ZIP combined transcript</h3>
|
| 1115 |
-
<textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON
|
| 1116 |
</div>
|
| 1117 |
</div>
|
| 1118 |
</div>
|
|
@@ -1179,6 +1282,15 @@ HTML_UI = r"""<!DOCTYPE html>
|
|
| 1179 |
-F "mode=medical_en" \
|
| 1180 |
-F "extract_terms=true" \
|
| 1181 |
-F "files=@path/to/audio1.flac"</code></pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1182 |
</div>
|
| 1183 |
</div>
|
| 1184 |
</main>
|
|
@@ -1424,6 +1536,7 @@ HTML_UI = r"""<!DOCTYPE html>
|
|
| 1424 |
document.getElementById("btn_zip_extract_only").addEventListener("click", async () => {
|
| 1425 |
const zipInput = document.getElementById("zip_input");
|
| 1426 |
const pwd = document.getElementById("zip_password").value || "";
|
|
|
|
| 1427 |
|
| 1428 |
if (!zipInput.files.length) {
|
| 1429 |
alert("Please choose a ZIP file.");
|
|
@@ -1440,8 +1553,29 @@ HTML_UI = r"""<!DOCTYPE html>
|
|
| 1440 |
try {
|
| 1441 |
const data = await postForm("/api/zip/extract", formData, false);
|
| 1442 |
const count = data.count || (data.audio_files ? data.audio_files.length : 0);
|
| 1443 |
-
const names =
|
| 1444 |
-
setStatus("Extracted " + count + " audio file(s) from ZIP
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1445 |
finishProgress("ZIP extraction complete");
|
| 1446 |
} catch (err) {
|
| 1447 |
console.error(err);
|
|
@@ -1450,7 +1584,7 @@ HTML_UI = r"""<!DOCTYPE html>
|
|
| 1450 |
}
|
| 1451 |
});
|
| 1452 |
|
| 1453 |
-
// ZIP JSON
|
| 1454 |
document.getElementById("btn_zip_json").addEventListener("click", async () => {
|
| 1455 |
const zipInput = document.getElementById("zip_input");
|
| 1456 |
const pwd = document.getElementById("zip_password").value || "";
|
|
@@ -1491,7 +1625,57 @@ HTML_UI = r"""<!DOCTYPE html>
|
|
| 1491 |
}
|
| 1492 |
});
|
| 1493 |
|
| 1494 |
-
// ZIP
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1495 |
document.getElementById("btn_zip_docx").addEventListener("click", async () => {
|
| 1496 |
const zipInput = document.getElementById("zip_input");
|
| 1497 |
const pwd = document.getElementById("zip_password").value || "";
|
|
|
|
| 181 |
|
| 182 |
|
| 183 |
def build_transcription_params(mode: str):
|
| 184 |
+
"""
|
| 185 |
+
Fast, CPU-friendly settings:
|
| 186 |
+
- greedy decoding (beam_size=1, best_of=1)
|
| 187 |
+
"""
|
| 188 |
params = {
|
| 189 |
"task": "transcribe",
|
| 190 |
+
"beam_size": 1, # was 5 → faster
|
| 191 |
+
"best_of": 1, # was 5 → faster
|
| 192 |
"temperature": 0.0,
|
| 193 |
}
|
| 194 |
|
|
|
|
| 200 |
"Write in a formal clinical style."
|
| 201 |
)
|
| 202 |
else:
|
| 203 |
+
# leave language autodetect for general mode
|
| 204 |
params["language"] = None
|
| 205 |
|
| 206 |
return params
|
|
|
|
| 380 |
- Multi-file audio upload (including .dct where supported by ffmpeg)
|
| 381 |
- Password-protected ZIP upload (default password: dietcoke1)
|
| 382 |
- Option to ONLY extract ZIP and list audio names (no transcription)
|
| 383 |
+
- NEW: ZIP → choose selected files to transcribe
|
| 384 |
- Medical-biased transcription mode
|
| 385 |
- Persistent word/phrase memory (replacements)
|
| 386 |
- Extraction & saving of frequent 'medical terms' from transcripts
|
| 387 |
- Combined transcript + DOCX export
|
| 388 |
+
- Fast greedy decoding for CPU (beam_size=1, best_of=1)
|
| 389 |
|
| 390 |
If a .dct file uses a proprietary codec that ffmpeg cannot decode,
|
| 391 |
you will get a clear error suggesting to convert to WAV/MP3 first.
|
| 392 |
|
| 393 |
Use `/docs` for Swagger UI and `/ui` for the web interface.
|
| 394 |
""",
|
| 395 |
+
version="2.5.0",
|
| 396 |
)
|
| 397 |
|
| 398 |
|
|
|
|
| 437 |
"memory_rules": num_rules,
|
| 438 |
"medical_terms_count": med_count,
|
| 439 |
"zip_default_password": DEFAULT_ZIP_PASSWORD,
|
| 440 |
+
"decoding": "fast (beam_size=1, best_of=1)",
|
| 441 |
}
|
| 442 |
)
|
| 443 |
except Exception as e:
|
|
|
|
| 595 |
)
|
| 596 |
|
| 597 |
|
| 598 |
+
# ---------- 4. ZIP transcription (JSON) – ALL FILES ----------
|
| 599 |
|
| 600 |
@app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
|
| 601 |
def transcribe_zip(
|
|
|
|
| 656 |
)
|
| 657 |
|
| 658 |
|
| 659 |
+
# ---------- 5. ZIP transcription (JSON) – SELECTED FILES ONLY ----------
|
| 660 |
+
|
| 661 |
+
@app.post("/api/transcribe/zip/selected", response_model=TranscriptionResponse)
|
| 662 |
+
def transcribe_zip_selected(
|
| 663 |
+
file: UploadFile = File(..., description="ZIP file containing audio files"),
|
| 664 |
+
password: str = Form(
|
| 665 |
+
"",
|
| 666 |
+
description="ZIP password. Leave blank to use default 'dietcoke1'.",
|
| 667 |
+
),
|
| 668 |
+
selected_files: str = Form(
|
| 669 |
+
"",
|
| 670 |
+
description="Comma-separated file names (inside ZIP) to transcribe",
|
| 671 |
+
),
|
| 672 |
+
mode: Literal["general", "medical_en"] = Form("medical_en"),
|
| 673 |
+
extract_terms: bool = Form(False),
|
| 674 |
+
):
|
| 675 |
+
"""
|
| 676 |
+
Extract ZIP, then ONLY transcribe the subset of files whose basenames are
|
| 677 |
+
passed in 'selected_files' (comma-separated).
|
| 678 |
+
"""
|
| 679 |
+
if file is None:
|
| 680 |
+
raise HTTPException(status_code=400, detail="No ZIP uploaded.")
|
| 681 |
+
|
| 682 |
+
effective_password = password if password else DEFAULT_ZIP_PASSWORD
|
| 683 |
+
|
| 684 |
+
selected_set = {
|
| 685 |
+
name.strip()
|
| 686 |
+
for name in (selected_files or "").split(",")
|
| 687 |
+
if name.strip()
|
| 688 |
+
}
|
| 689 |
+
if not selected_set:
|
| 690 |
+
raise HTTPException(
|
| 691 |
+
status_code=400,
|
| 692 |
+
detail="No selected_files provided. Please choose at least one file from the ZIP.",
|
| 693 |
+
)
|
| 694 |
+
|
| 695 |
+
extracted_paths = extract_zip_to_temp(file, effective_password)
|
| 696 |
+
audio_paths = filter_audio_files(extracted_paths)
|
| 697 |
+
|
| 698 |
+
if not audio_paths:
|
| 699 |
+
raise HTTPException(
|
| 700 |
+
status_code=400,
|
| 701 |
+
detail=(
|
| 702 |
+
"No valid audio files in ZIP. "
|
| 703 |
+
f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
|
| 704 |
+
),
|
| 705 |
+
)
|
| 706 |
+
|
| 707 |
+
# Map names -> paths for quick lookup
|
| 708 |
+
name_to_path = {}
|
| 709 |
+
for p in audio_paths:
|
| 710 |
+
base = os.path.basename(p)
|
| 711 |
+
if base in selected_set:
|
| 712 |
+
name_to_path[base] = p
|
| 713 |
+
|
| 714 |
+
if not name_to_path:
|
| 715 |
+
raise HTTPException(
|
| 716 |
+
status_code=400,
|
| 717 |
+
detail="None of the selected_files were found as audio inside the ZIP.",
|
| 718 |
+
)
|
| 719 |
+
|
| 720 |
+
items: List[FileTranscript] = []
|
| 721 |
+
try:
|
| 722 |
+
# keep order in which user selected (or alphabetical; here we just iterate on set intersection)
|
| 723 |
+
for fname in sorted(name_to_path.keys()):
|
| 724 |
+
path = name_to_path[fname]
|
| 725 |
+
text = transcribe_file(path, mode)
|
| 726 |
+
items.append(FileTranscript(filename=fname, text=text))
|
| 727 |
+
except RuntimeError as e:
|
| 728 |
+
msg = str(e)
|
| 729 |
+
if "Audio decoder could not read file" in msg:
|
| 730 |
+
raise HTTPException(status_code=400, detail=msg) from e
|
| 731 |
+
raise HTTPException(
|
| 732 |
+
status_code=500,
|
| 733 |
+
detail=f"Transcription failed (ZIP selected): {msg}",
|
| 734 |
+
) from e
|
| 735 |
+
|
| 736 |
+
combined = format_combined(items)
|
| 737 |
+
filenames = [it.filename for it in items]
|
| 738 |
+
|
| 739 |
+
new_terms: List[str] = []
|
| 740 |
+
if extract_terms and combined:
|
| 741 |
+
new_terms = update_med_terms_from_text(combined)
|
| 742 |
+
|
| 743 |
+
return TranscriptionResponse(
|
| 744 |
+
mode=mode,
|
| 745 |
+
combined_transcript=combined,
|
| 746 |
+
items=items,
|
| 747 |
+
file_count=len(items),
|
| 748 |
+
audio_files=filenames,
|
| 749 |
+
new_medical_terms=new_terms,
|
| 750 |
+
)
|
| 751 |
+
|
| 752 |
+
|
| 753 |
+
# ---------- 6. ZIP transcription (DOCX) – ALL FILES ----------
|
| 754 |
|
| 755 |
@app.post("/api/transcribe/zip/docx")
|
| 756 |
def transcribe_zip_docx(
|
|
|
|
| 810 |
|
| 811 |
# ===================== MEMORY ENDPOINTS =====================
|
| 812 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
@app.get("/api/memory", response_model=MemoryResponse)
|
| 814 |
def get_memory():
|
| 815 |
mem = load_memory()
|
|
|
|
| 1095 |
<h1>Whisper – Medical Batch Transcription (faster-whisper CPU)</h1>
|
| 1096 |
<p>
|
| 1097 |
Multi-file & ZIP transcription with medical mode, .dct support (where decodable), ZIP extract-only mode,
|
| 1098 |
+
selectable ZIP files for transcription, and memory of preferred terms + collected medical vocabulary.
|
| 1099 |
Default ZIP password: <code>dietcoke1</code>. API docs: <code>/docs</code>.
|
| 1100 |
</p>
|
| 1101 |
</header>
|
|
|
|
| 1203 |
|
| 1204 |
<div class="btn-row">
|
| 1205 |
<button class="btn-secondary" id="btn_zip_extract_only">Extract only & list audio files</button>
|
| 1206 |
+
<button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON (all files)</button>
|
| 1207 |
+
<button class="btn-secondary" id="btn_zip_selected">Transcribe selected from ZIP → JSON</button>
|
| 1208 |
+
<button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX (all files)</button>
|
| 1209 |
+
</div>
|
| 1210 |
+
|
| 1211 |
+
<h3>Files inside ZIP (select to transcribe)</h3>
|
| 1212 |
+
<div id="zip_file_list" class="small-hint">
|
| 1213 |
+
Run "Extract only & list audio files" to see files and choose which ones to transcribe.
|
| 1214 |
</div>
|
| 1215 |
</div>
|
| 1216 |
<div class="col">
|
| 1217 |
<h3>ZIP combined transcript</h3>
|
| 1218 |
+
<textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON buttons."></textarea>
|
| 1219 |
</div>
|
| 1220 |
</div>
|
| 1221 |
</div>
|
|
|
|
| 1282 |
-F "mode=medical_en" \
|
| 1283 |
-F "extract_terms=true" \
|
| 1284 |
-F "files=@path/to/audio1.flac"</code></pre>
|
| 1285 |
+
|
| 1286 |
+
<h3>ZIP selected files JSON</h3>
|
| 1287 |
+
<pre><code>curl -X POST \
|
| 1288 |
+
"https://staraks-whisper-large-v3.hf.space/api/transcribe/zip/selected" \
|
| 1289 |
+
-H "Accept: application/json" \
|
| 1290 |
+
-F "mode=medical_en" \
|
| 1291 |
+
-F "extract_terms=true" \
|
| 1292 |
+
-F "selected_files=file1.wav,file3.dct" \
|
| 1293 |
+
-F "file=@path/to/archive.zip"</code></pre>
|
| 1294 |
</div>
|
| 1295 |
</div>
|
| 1296 |
</main>
|
|
|
|
| 1536 |
document.getElementById("btn_zip_extract_only").addEventListener("click", async () => {
|
| 1537 |
const zipInput = document.getElementById("zip_input");
|
| 1538 |
const pwd = document.getElementById("zip_password").value || "";
|
| 1539 |
+
const listDiv = document.getElementById("zip_file_list");
|
| 1540 |
|
| 1541 |
if (!zipInput.files.length) {
|
| 1542 |
alert("Please choose a ZIP file.");
|
|
|
|
| 1553 |
try {
|
| 1554 |
const data = await postForm("/api/zip/extract", formData, false);
|
| 1555 |
const count = data.count || (data.audio_files ? data.audio_files.length : 0);
|
| 1556 |
+
const names = data.audio_files || [];
|
| 1557 |
+
setStatus("Extracted " + count + " audio file(s) from ZIP.");
|
| 1558 |
+
|
| 1559 |
+
// populate selectable list
|
| 1560 |
+
if (names.length) {
|
| 1561 |
+
listDiv.innerHTML = "";
|
| 1562 |
+
names.forEach(name => {
|
| 1563 |
+
const safeId = "zip_choice_" + name.replace(/[^a-zA-Z0-9_\-]/g, "_");
|
| 1564 |
+
const label = document.createElement("label");
|
| 1565 |
+
label.style.display = "block";
|
| 1566 |
+
const cb = document.createElement("input");
|
| 1567 |
+
cb.type = "checkbox";
|
| 1568 |
+
cb.value = name;
|
| 1569 |
+
cb.id = safeId;
|
| 1570 |
+
cb.checked = true;
|
| 1571 |
+
label.appendChild(cb);
|
| 1572 |
+
label.append(" " + name);
|
| 1573 |
+
listDiv.appendChild(label);
|
| 1574 |
+
});
|
| 1575 |
+
} else {
|
| 1576 |
+
listDiv.innerHTML = "No audio files found in ZIP.";
|
| 1577 |
+
}
|
| 1578 |
+
|
| 1579 |
finishProgress("ZIP extraction complete");
|
| 1580 |
} catch (err) {
|
| 1581 |
console.error(err);
|
|
|
|
| 1584 |
}
|
| 1585 |
});
|
| 1586 |
|
| 1587 |
+
// ZIP JSON – ALL FILES
|
| 1588 |
document.getElementById("btn_zip_json").addEventListener("click", async () => {
|
| 1589 |
const zipInput = document.getElementById("zip_input");
|
| 1590 |
const pwd = document.getElementById("zip_password").value || "";
|
|
|
|
| 1625 |
}
|
| 1626 |
});
|
| 1627 |
|
| 1628 |
+
// ZIP JSON – SELECTED FILES ONLY
|
| 1629 |
+
document.getElementById("btn_zip_selected").addEventListener("click", async () => {
|
| 1630 |
+
const zipInput = document.getElementById("zip_input");
|
| 1631 |
+
const pwd = document.getElementById("zip_password").value || "";
|
| 1632 |
+
const mode = document.getElementById("zip_mode").value;
|
| 1633 |
+
const extractTerms = document.getElementById("zip_extract_terms").checked;
|
| 1634 |
+
const out = document.getElementById("zip_output");
|
| 1635 |
+
const listDiv = document.getElementById("zip_file_list");
|
| 1636 |
+
|
| 1637 |
+
if (!zipInput.files.length) {
|
| 1638 |
+
alert("Please choose a ZIP file.");
|
| 1639 |
+
return;
|
| 1640 |
+
}
|
| 1641 |
+
|
| 1642 |
+
const checkboxes = listDiv.querySelectorAll("input[type='checkbox']:checked");
|
| 1643 |
+
if (!checkboxes.length) {
|
| 1644 |
+
alert("Please select at least one file from the ZIP (use the checkboxes).");
|
| 1645 |
+
return;
|
| 1646 |
+
}
|
| 1647 |
+
const names = Array.from(checkboxes).map(cb => cb.value);
|
| 1648 |
+
|
| 1649 |
+
const formData = new FormData();
|
| 1650 |
+
formData.append("file", zipInput.files[0]);
|
| 1651 |
+
formData.append("password", pwd);
|
| 1652 |
+
formData.append("mode", mode);
|
| 1653 |
+
formData.append("extract_terms", extractTerms ? "true" : "false");
|
| 1654 |
+
formData.append("selected_files", names.join(","));
|
| 1655 |
+
|
| 1656 |
+
setStatus("Uploading ZIP and transcribing selected files only…");
|
| 1657 |
+
out.value = "";
|
| 1658 |
+
startSimulatedProgress("Uploading & transcribing selected ZIP files");
|
| 1659 |
+
|
| 1660 |
+
try {
|
| 1661 |
+
const data = await postForm("/api/transcribe/zip/selected", formData, false);
|
| 1662 |
+
out.value = data.combined_transcript || "";
|
| 1663 |
+
const count = data.file_count || (data.items ? data.items.length : 0);
|
| 1664 |
+
const transcribedNames = (data.audio_files || []).join(", ");
|
| 1665 |
+
let extra = "";
|
| 1666 |
+
if (data.new_medical_terms && data.new_medical_terms.length) {
|
| 1667 |
+
extra = " New medical terms added: " + data.new_medical_terms.join(", ");
|
| 1668 |
+
}
|
| 1669 |
+
setStatus("Done. Transcribed " + count + " selected file(s) from ZIP: " + transcribedNames + extra);
|
| 1670 |
+
finishProgress("Selected ZIP transcription complete");
|
| 1671 |
+
} catch (err) {
|
| 1672 |
+
console.error(err);
|
| 1673 |
+
alert(err.message);
|
| 1674 |
+
errorProgress(err.message || "Error during selected ZIP transcription.");
|
| 1675 |
+
}
|
| 1676 |
+
});
|
| 1677 |
+
|
| 1678 |
+
// ZIP DOCX – ALL FILES
|
| 1679 |
document.getElementById("btn_zip_docx").addEventListener("click", async () => {
|
| 1680 |
const zipInput = document.getElementById("zip_input");
|
| 1681 |
const pwd = document.getElementById("zip_password").value || "";
|