whisper-large-v3

Sleeping

App Files Files Community

staraks commited on Nov 20, 2025

Commit

deba159

verified ·

1 Parent(s): 4236885

Update app.py

Browse files

Files changed (1) hide show

app.py +202 -18

app.py CHANGED Viewed

@@ -181,10 +181,14 @@ def get_model() -> WhisperModel:
 def build_transcription_params(mode: str):
     params = {
         "task": "transcribe",
-        "beam_size": 5,
-        "best_of": 5,
         "temperature": 0.0,
     }
@@ -196,6 +200,7 @@ def build_transcription_params(mode: str):
             "Write in a formal clinical style."
         )
     else:
         params["language"] = None
     return params
@@ -375,17 +380,19 @@ HTTP API for Whisper (via faster-whisper) with:
 - Multi-file audio upload (including .dct where supported by ffmpeg)
 - Password-protected ZIP upload (default password: dietcoke1)
 - Option to ONLY extract ZIP and list audio names (no transcription)
 - Medical-biased transcription mode
 - Persistent word/phrase memory (replacements)
 - Extraction & saving of frequent 'medical terms' from transcripts
 - Combined transcript + DOCX export
 If a .dct file uses a proprietary codec that ffmpeg cannot decode,
 you will get a clear error suggesting to convert to WAV/MP3 first.
 Use `/docs` for Swagger UI and `/ui` for the web interface.
 """,
-    version="2.4.0",
 )
@@ -430,6 +437,7 @@ def self_test():
                 "memory_rules": num_rules,
                 "medical_terms_count": med_count,
                 "zip_default_password": DEFAULT_ZIP_PASSWORD,
             }
         )
     except Exception as e:
@@ -587,7 +595,7 @@ def zip_extract_only(
     )
-# ---------- 4. ZIP transcription (JSON) ----------
 @app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
 def transcribe_zip(
@@ -648,7 +656,101 @@ def transcribe_zip(
     )
-# ---------- 5. ZIP transcription (DOCX) ----------
 @app.post("/api/transcribe/zip/docx")
 def transcribe_zip_docx(
@@ -708,11 +810,6 @@ def transcribe_zip_docx(
 # ===================== MEMORY ENDPOINTS =====================
-class MemoryRule(BaseModel):
-    source: str
-    target: str
 @app.get("/api/memory", response_model=MemoryResponse)
 def get_memory():
     mem = load_memory()
@@ -998,7 +1095,7 @@ HTML_UI = r"""<!DOCTYPE html>
     <h1>Whisper – Medical Batch Transcription (faster-whisper CPU)</h1>
     <p>
       Multi-file & ZIP transcription with medical mode, .dct support (where decodable), ZIP extract-only mode,
-      and memory of preferred terms + collected medical vocabulary.
       Default ZIP password: <code>dietcoke1</code>. API docs: <code>/docs</code>.
     </p>
   </header>
@@ -1106,13 +1203,19 @@ HTML_UI = r"""<!DOCTYPE html>
             <div class="btn-row">
               <button class="btn-secondary" id="btn_zip_extract_only">Extract only & list audio files</button>
-              <button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON</button>
-              <button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX</button>
             </div>
           </div>
           <div class="col">
             <h3>ZIP combined transcript</h3>
-            <textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON button."></textarea>
           </div>
         </div>
       </div>
@@ -1179,6 +1282,15 @@ HTML_UI = r"""<!DOCTYPE html>
   -F "mode=medical_en" \
   -F "extract_terms=true" \
   -F "files=@path/to/audio1.flac"</code></pre>
       </div>
     </div>
   </main>
@@ -1424,6 +1536,7 @@ HTML_UI = r"""<!DOCTYPE html>
     document.getElementById("btn_zip_extract_only").addEventListener("click", async () => {
       const zipInput = document.getElementById("zip_input");
       const pwd = document.getElementById("zip_password").value || "";
       if (!zipInput.files.length) {
         alert("Please choose a ZIP file.");
@@ -1440,8 +1553,29 @@ HTML_UI = r"""<!DOCTYPE html>
       try {
         const data = await postForm("/api/zip/extract", formData, false);
         const count = data.count || (data.audio_files ? data.audio_files.length : 0);
-        const names = (data.audio_files || []).join(", ");
-        setStatus("Extracted " + count + " audio file(s) from ZIP: " + names);
         finishProgress("ZIP extraction complete");
       } catch (err) {
         console.error(err);
@@ -1450,7 +1584,7 @@ HTML_UI = r"""<!DOCTYPE html>
       }
     });
-    // ZIP JSON
     document.getElementById("btn_zip_json").addEventListener("click", async () => {
       const zipInput = document.getElementById("zip_input");
       const pwd = document.getElementById("zip_password").value || "";
@@ -1491,7 +1625,57 @@ HTML_UI = r"""<!DOCTYPE html>
       }
     });
-    // ZIP DOCX
     document.getElementById("btn_zip_docx").addEventListener("click", async () => {
       const zipInput = document.getElementById("zip_input");
       const pwd = document.getElementById("zip_password").value || "";

 def build_transcription_params(mode: str):
+    """
+    Fast, CPU-friendly settings:
+    - greedy decoding (beam_size=1, best_of=1)
+    """
     params = {
         "task": "transcribe",
+        "beam_size": 1,   # was 5 → faster
+        "best_of": 1,     # was 5 → faster
         "temperature": 0.0,
     }
             "Write in a formal clinical style."
         )
     else:
+        # leave language autodetect for general mode
         params["language"] = None
     return params
 - Multi-file audio upload (including .dct where supported by ffmpeg)
 - Password-protected ZIP upload (default password: dietcoke1)
 - Option to ONLY extract ZIP and list audio names (no transcription)
+- NEW: ZIP → choose selected files to transcribe
 - Medical-biased transcription mode
 - Persistent word/phrase memory (replacements)
 - Extraction & saving of frequent 'medical terms' from transcripts
 - Combined transcript + DOCX export
+- Fast greedy decoding for CPU (beam_size=1, best_of=1)
 If a .dct file uses a proprietary codec that ffmpeg cannot decode,
 you will get a clear error suggesting to convert to WAV/MP3 first.
 Use `/docs` for Swagger UI and `/ui` for the web interface.
 """,
+    version="2.5.0",
 )
                 "memory_rules": num_rules,
                 "medical_terms_count": med_count,
                 "zip_default_password": DEFAULT_ZIP_PASSWORD,
+                "decoding": "fast (beam_size=1, best_of=1)",
             }
         )
     except Exception as e:
     )
+# ---------- 4. ZIP transcription (JSON) – ALL FILES ----------
 @app.post("/api/transcribe/zip", response_model=TranscriptionResponse)
 def transcribe_zip(
     )
+# ---------- 5. ZIP transcription (JSON) – SELECTED FILES ONLY ----------
+@app.post("/api/transcribe/zip/selected", response_model=TranscriptionResponse)
+def transcribe_zip_selected(
+    file: UploadFile = File(..., description="ZIP file containing audio files"),
+    password: str = Form(
+        "",
+        description="ZIP password. Leave blank to use default 'dietcoke1'.",
+    ),
+    selected_files: str = Form(
+        "",
+        description="Comma-separated file names (inside ZIP) to transcribe",
+    ),
+    mode: Literal["general", "medical_en"] = Form("medical_en"),
+    extract_terms: bool = Form(False),
+):
+    """
+    Extract ZIP, then ONLY transcribe the subset of files whose basenames are
+    passed in 'selected_files' (comma-separated).
+    """
+    if file is None:
+        raise HTTPException(status_code=400, detail="No ZIP uploaded.")
+    effective_password = password if password else DEFAULT_ZIP_PASSWORD
+    selected_set = {
+        name.strip()
+        for name in (selected_files or "").split(",")
+        if name.strip()
+    }
+    if not selected_set:
+        raise HTTPException(
+            status_code=400,
+            detail="No selected_files provided. Please choose at least one file from the ZIP.",
+        )
+    extracted_paths = extract_zip_to_temp(file, effective_password)
+    audio_paths = filter_audio_files(extracted_paths)
+    if not audio_paths:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                "No valid audio files in ZIP. "
+                f"Supported extensions: {', '.join(AUDIO_EXTENSIONS)}"
+            ),
+        )
+    # Map names -> paths for quick lookup
+    name_to_path = {}
+    for p in audio_paths:
+        base = os.path.basename(p)
+        if base in selected_set:
+            name_to_path[base] = p
+    if not name_to_path:
+        raise HTTPException(
+            status_code=400,
+            detail="None of the selected_files were found as audio inside the ZIP.",
+        )
+    items: List[FileTranscript] = []
+    try:
+        # keep order in which user selected (or alphabetical; here we just iterate on set intersection)
+        for fname in sorted(name_to_path.keys()):
+            path = name_to_path[fname]
+            text = transcribe_file(path, mode)
+            items.append(FileTranscript(filename=fname, text=text))
+    except RuntimeError as e:
+        msg = str(e)
+        if "Audio decoder could not read file" in msg:
+            raise HTTPException(status_code=400, detail=msg) from e
+        raise HTTPException(
+            status_code=500,
+            detail=f"Transcription failed (ZIP selected): {msg}",
+        ) from e
+    combined = format_combined(items)
+    filenames = [it.filename for it in items]
+    new_terms: List[str] = []
+    if extract_terms and combined:
+        new_terms = update_med_terms_from_text(combined)
+    return TranscriptionResponse(
+        mode=mode,
+        combined_transcript=combined,
+        items=items,
+        file_count=len(items),
+        audio_files=filenames,
+        new_medical_terms=new_terms,
+    )
+# ---------- 6. ZIP transcription (DOCX) – ALL FILES ----------
 @app.post("/api/transcribe/zip/docx")
 def transcribe_zip_docx(
 # ===================== MEMORY ENDPOINTS =====================
 @app.get("/api/memory", response_model=MemoryResponse)
 def get_memory():
     mem = load_memory()
     <h1>Whisper – Medical Batch Transcription (faster-whisper CPU)</h1>
     <p>
       Multi-file & ZIP transcription with medical mode, .dct support (where decodable), ZIP extract-only mode,
+      selectable ZIP files for transcription, and memory of preferred terms + collected medical vocabulary.
       Default ZIP password: <code>dietcoke1</code>. API docs: <code>/docs</code>.
     </p>
   </header>
             <div class="btn-row">
               <button class="btn-secondary" id="btn_zip_extract_only">Extract only & list audio files</button>
+              <button class="btn-primary" id="btn_zip_json">Transcribe ZIP → JSON (all files)</button>
+              <button class="btn-secondary" id="btn_zip_selected">Transcribe selected from ZIP → JSON</button>
+              <button class="btn-secondary" id="btn_zip_docx">Download ZIP DOCX (all files)</button>
+            </div>
+            <h3>Files inside ZIP (select to transcribe)</h3>
+            <div id="zip_file_list" class="small-hint">
+              Run "Extract only & list audio files" to see files and choose which ones to transcribe.
             </div>
           </div>
           <div class="col">
             <h3>ZIP combined transcript</h3>
+            <textarea id="zip_output" placeholder="Transcript will appear here when you use the JSON buttons."></textarea>
           </div>
         </div>
       </div>
   -F "mode=medical_en" \
   -F "extract_terms=true" \
   -F "files=@path/to/audio1.flac"</code></pre>
+        <h3>ZIP selected files JSON</h3>
+        <pre><code>curl -X POST \
+  "https://staraks-whisper-large-v3.hf.space/api/transcribe/zip/selected" \
+  -H "Accept: application/json" \
+  -F "mode=medical_en" \
+  -F "extract_terms=true" \
+  -F "selected_files=file1.wav,file3.dct" \
+  -F "file=@path/to/archive.zip"</code></pre>
       </div>
     </div>
   </main>
     document.getElementById("btn_zip_extract_only").addEventListener("click", async () => {
       const zipInput = document.getElementById("zip_input");
       const pwd = document.getElementById("zip_password").value || "";
+      const listDiv = document.getElementById("zip_file_list");
       if (!zipInput.files.length) {
         alert("Please choose a ZIP file.");
       try {
         const data = await postForm("/api/zip/extract", formData, false);
         const count = data.count || (data.audio_files ? data.audio_files.length : 0);
+        const names = data.audio_files || [];
+        setStatus("Extracted " + count + " audio file(s) from ZIP.");
+        // populate selectable list
+        if (names.length) {
+          listDiv.innerHTML = "";
+          names.forEach(name => {
+            const safeId = "zip_choice_" + name.replace(/[^a-zA-Z0-9_\-]/g, "_");
+            const label = document.createElement("label");
+            label.style.display = "block";
+            const cb = document.createElement("input");
+            cb.type = "checkbox";
+            cb.value = name;
+            cb.id = safeId;
+            cb.checked = true;
+            label.appendChild(cb);
+            label.append(" " + name);
+            listDiv.appendChild(label);
+          });
+        } else {
+          listDiv.innerHTML = "No audio files found in ZIP.";
+        }
         finishProgress("ZIP extraction complete");
       } catch (err) {
         console.error(err);
       }
     });
+    // ZIP JSON – ALL FILES
     document.getElementById("btn_zip_json").addEventListener("click", async () => {
       const zipInput = document.getElementById("zip_input");
       const pwd = document.getElementById("zip_password").value || "";
       }
     });
+    // ZIP JSON – SELECTED FILES ONLY
+    document.getElementById("btn_zip_selected").addEventListener("click", async () => {
+      const zipInput = document.getElementById("zip_input");
+      const pwd = document.getElementById("zip_password").value || "";
+      const mode = document.getElementById("zip_mode").value;
+      const extractTerms = document.getElementById("zip_extract_terms").checked;
+      const out = document.getElementById("zip_output");
+      const listDiv = document.getElementById("zip_file_list");
+      if (!zipInput.files.length) {
+        alert("Please choose a ZIP file.");
+        return;
+      }
+      const checkboxes = listDiv.querySelectorAll("input[type='checkbox']:checked");
+      if (!checkboxes.length) {
+        alert("Please select at least one file from the ZIP (use the checkboxes).");
+        return;
+      }
+      const names = Array.from(checkboxes).map(cb => cb.value);
+      const formData = new FormData();
+      formData.append("file", zipInput.files[0]);
+      formData.append("password", pwd);
+      formData.append("mode", mode);
+      formData.append("extract_terms", extractTerms ? "true" : "false");
+      formData.append("selected_files", names.join(","));
+      setStatus("Uploading ZIP and transcribing selected files only…");
+      out.value = "";
+      startSimulatedProgress("Uploading & transcribing selected ZIP files");
+      try {
+        const data = await postForm("/api/transcribe/zip/selected", formData, false);
+        out.value = data.combined_transcript || "";
+        const count = data.file_count || (data.items ? data.items.length : 0);
+        const transcribedNames = (data.audio_files || []).join(", ");
+        let extra = "";
+        if (data.new_medical_terms && data.new_medical_terms.length) {
+          extra = " New medical terms added: " + data.new_medical_terms.join(", ");
+        }
+        setStatus("Done. Transcribed " + count + " selected file(s) from ZIP: " + transcribedNames + extra);
+        finishProgress("Selected ZIP transcription complete");
+      } catch (err) {
+        console.error(err);
+        alert(err.message);
+        errorProgress(err.message || "Error during selected ZIP transcription.");
+      }
+    });
+    // ZIP DOCX – ALL FILES
     document.getElementById("btn_zip_docx").addEventListener("click", async () => {
       const zipInput = document.getElementById("zip_input");
       const pwd = document.getElementById("zip_password").value || "";