moderntranscribe

Sleeping

App Files Files Community

staraks commited on Nov 25, 2025

Commit

fff2ebd

verified ·

1 Parent(s): c7a9d02

Update app.py

Browse files

Files changed (1) hide show

app.py +715 -279

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # app.py
-# Whisper Transcriber - single / multi / zip options + memory
-# Compatible with gradio 3.x, whisper, pydub, pyzipper, python-docx, ffmpeg
 import os
 import sys
@@ -11,16 +11,17 @@ import subprocess
 import traceback
 import threading
 import re
 from difflib import get_close_matches
-from pathlib import Path
 from uuid import uuid4
-# immediate prints
 os.environ["PYTHONUNBUFFERED"] = "1"
 print("DEBUG: app.py bootstrap starting", flush=True)
-# third-party imports
 try:
     import gradio as gr
     import whisper
@@ -33,9 +34,9 @@ except Exception as e:
     raise
 # ---------- Config ----------
-MIN_WAV_SIZE = 1024
 MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
 FFMPEG_CANDIDATES = [
     ("s16le", 16000, 1),
     ("s16le", 44100, 2),
@@ -44,8 +45,9 @@ FFMPEG_CANDIDATES = [
     ("mulaw", 8000, 1),
 ]
 MODEL_CACHE = {}
-# ---------- Memory helpers ----------
 def load_memory():
     try:
         if os.path.exists(MEMORY_FILE):
@@ -78,7 +80,6 @@ def save_memory(mem):
 memory = load_memory()
-# ---------- Postprocessing ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
     "dx": "diagnosis",
@@ -92,7 +93,6 @@ MEDICAL_ABBREVIATIONS = {
     "adm": "admit",
     "disch": "discharge",
 }
 DRUG_NORMALIZATION = {
     "metformin": "Metformin",
     "aspirin": "Aspirin",
@@ -214,19 +214,24 @@ def save_as_word(text, filename=None):
     return filename
-def _ffmpeg_convert(input_path, out_path):
-    cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y", "-i", input_path, "-ar", "16000", "-ac", "1", out_path]
     try:
         proc = subprocess.run(cmd, capture_output=True, timeout=60, text=True)
         if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
-            return True, proc.stdout + proc.stderr
         else:
             try:
                 if os.path.exists(out_path):
                     os.unlink(out_path)
             except Exception:
                 pass
-            return False, proc.stdout + proc.stderr
     except Exception as e:
         try:
             if os.path.exists(out_path):
@@ -242,6 +247,7 @@ def convert_to_wav_if_needed(input_path):
     if lower.endswith(".wav"):
         return input_path
     tmp = None
     try:
         tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
@@ -255,25 +261,29 @@ def convert_to_wav_if_needed(input_path):
             except Exception:
                 pass
     except Exception:
         try:
             if tmp and os.path.exists(tmp.name):
                 os.unlink(tmp.name)
         except Exception:
             pass
-    # ffmpeg fallback attempts
     diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
     diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
     diagnostics = []
     for fmt, sr, ch in FFMPEG_CANDIDATES:
         out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
         out_wav.close()
-        success, debug = _ffmpeg_convert(input_path, out_wav.name)
         diagnostics.append(f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n")
         if success:
             try:
                 with open(diag_log, "w", encoding="utf-8") as fh:
-                    fh.write("pydub auto error: fallback used\n\n")
                     fh.write("Diagnostics:\n")
                     fh.write("\n".join(diagnostics))
             except Exception:
@@ -286,6 +296,16 @@ def convert_to_wav_if_needed(input_path):
             except Exception:
                 pass
     try:
         with open(input_path, "rb") as fh:
             head = fh.read(512)
@@ -295,6 +315,8 @@ def convert_to_wav_if_needed(input_path):
     try:
         with open(diag_log, "w", encoding="utf-8") as fh:
             fh.write("Full diagnostics:\n\n")
             fh.write("\n\n".join(diagnostics))
     except Exception as e:
@@ -327,38 +349,61 @@ def safe_model_choices(prefer_default="small"):
 def get_whisper_model(name, device=None):
-    key = f"{name}|{device}"
-    if key not in MODEL_CACHE:
-        print(f"DEBUG: loading whisper model '{name}' (device={device})", flush=True)
         try:
-            if device and device != "auto":
-                MODEL_CACHE[key] = whisper.load_model(name, device=device)
             else:
-                MODEL_CACHE[key] = whisper.load_model(name)
         except TypeError:
-            MODEL_CACHE[key] = whisper.load_model(name)
-    return MODEL_CACHE[key]
-# ---------- ZIP extraction ----------
-def extract_zip_and_list(zip_path, zip_password=None):
-    temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_{uuid4().hex[:8]}")
-    os.makedirs(temp_extract_dir, exist_ok=True)
-    extracted = {}
     logs = []
     try:
         with pyzipper.ZipFile(zip_path, "r") as zf:
             if zip_password:
                 try:
                     zf.setpassword(zip_password.encode())
                 except Exception:
-                    # non-fatal
-                    pass
             for info in zf.infolist():
                 if info.is_dir():
                     continue
                 _, ext = os.path.splitext(info.filename)
-                if ext.lower() not in [".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac", ".dct", ".dat"]:
                     continue
                 try:
                     zf.extract(info, path=temp_extract_dir)
@@ -371,304 +416,658 @@ def extract_zip_and_list(zip_path, zip_password=None):
                 fullp = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
                 if not os.path.exists(fullp):
                     continue
-                key = os.path.basename(info.filename)
-                # ensure unique key
-                i = 1
-                original = key
-                while key in extracted:
-                    name_only, extn = os.path.splitext(original)
-                    i += 1
-                    key = f"{name_only} ({i}){extn}"
-                extracted[key] = fullp
                 logs.append(f"Extracted: {info.filename} -> {key}")
-        if not extracted:
             logs.append("No supported audio files found in ZIP.")
-    except pyzipper.BadZipFile:
-        logs.append("Invalid zip file.")
     except Exception as e:
-        logs.append(f"Zip extraction failed: {e}")
-    return extracted, "\n".join(logs)
-# ---------- Transcription operations ----------
-def transcribe_file(path, model_name="small", device_choice="auto", enable_memory=False):
     logs = []
     try:
         if not path:
-            return "", "No path", "\n".join(logs)
-        # normalize path
-        p = str(path)
-        logs.append(f"Converting: {p}")
         wav = convert_to_wav_if_needed(p)
-        logs.append(f"WAV ready: {wav}")
-        model = get_whisper_model(model_name, device=None if device_choice == "auto" else device_choice)
-        logs.append(f"Model loaded: {model_name}")
-        result = model.transcribe(wav)
-        text = result.get("text", "").strip()
-        if enable_memory:
-            text = memory_correct_text(text)
-        text = postprocess_transcript(text)
         if enable_memory:
             try:
-                update_memory_with_transcript(text)
-                logs.append("Memory updated")
             except Exception:
                 pass
-        # cleanup tmp wav if it was created
-        if wav and os.path.exists(wav) and not p.lower().endswith(".wav"):
             try:
                 os.unlink(wav)
             except Exception:
                 pass
-        return text, None, "\n".join(logs)
     except Exception as e:
         tb = traceback.format_exc()
-        return "", f"ERROR: {e}", tb
-# ---------- Batch orchestration ----------
-def batch_transcribe_from_paths(paths, model_name, device_name, enable_mem, merge_flag):
     logs = []
     transcripts = []
-    perfile_docx = []
-    errors = []
-    for idx, p in enumerate(paths, start=1):
-        logs.append(f"[{idx}/{len(paths)}] {p}")
-        text, err, lg = transcribe_file(p, model_name=model_name, device_choice=device_name, enable_memory=enable_mem)
-        logs.append(lg or "")
-        if err:
-            errors.append(f"{os.path.basename(p)}: {err}")
-            transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: {err}\n")
         else:
-            transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
-        # create per-file docx
-        try:
-            safe_name = Path(p).stem
-            out_doc = os.path.join(tempfile.gettempdir(), f"{safe_name}_{uuid4().hex[:8]}.docx")
-            save_as_word(text or "", out_doc)
-            perfile_docx.append((os.path.basename(p), out_doc))
-        except Exception as e:
-            errors.append(f"Failed to write docx for {p}: {e}")
     combined = "\n\n".join(transcripts)
-    merged_doc = None
     if merge_flag:
         try:
-            merged_doc = os.path.join(tempfile.gettempdir(), f"merged_{uuid4().hex[:8]}.docx")
-            save_as_word(combined, merged_doc)
-            logs.append(f"Merged saved: {merged_doc}")
         except Exception as e:
             logs.append(f"Merge failed: {e}")
-    # zip per-file docx for download
-    zip_path = None
     try:
-        zip_path = os.path.join(tempfile.gettempdir(), f"perfiles_{uuid4().hex[:8]}.zip")
-        import zipfile
-        with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
-            for base, pth in perfile_docx:
-                arcname = Path(base).stem + ".docx"
-                zf.write(pth, arcname=arcname)
-        logs.append(f"Per-file ZIP created: {zip_path}")
-    except Exception as e:
-        logs.append(f"Failed to create per-file ZIP: {e}")
-        zip_path = None
-    # add errors to logs
-    if errors:
-        logs.append("Errors:")
-        logs.extend(errors)
-    # also list produced files
-    logs.append("Per-file outputs:")
-    for base, pth in perfile_docx:
-        logs.append(f" - {base} -> {Path(pth).name}")
-    return combined, "\n".join(logs), zip_path, merged_doc
-# ---------- UI building ----------
 print("DEBUG: building Gradio UI", flush=True)
 available_choices, default_choice = safe_model_choices(prefer_default="small")
 CSS = """
 :root{
   --accent:#4f46e5;
-  --muted:#9ca3af;
-  --card:#0b1220;
-  --bg:#071022;
-  --text:#e6eef8;
-  --transcript-bg:#071026;
   --transcript-color:#e6eef8;
 }
 body { background: var(--bg); color: var(--text); font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
-.card { background: var(--card); border-radius:10px; padding:12px; box-shadow: 0 6px 20px rgba(0,0,0,0.4); }
 .small-note { color:var(--muted); font-size:12px;}
-.transcript-area { white-space:pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, "Roboto Mono", monospace; background: var(--transcript-bg); color: var(--transcript-color); padding:12px; border-radius:8px; min-height:160px; }
 """
-with gr.Blocks(title="Whisper Transcriber - single/multi/zip", css=CSS) as demo:
-    # set dark theme default via a tiny script
-    gr.HTML("""<script>document.documentElement.setAttribute('data-theme','dark');</script>""")
-    gr.Markdown("<h3 style='margin:6px 0'>Whisper Transcriber — Single / Multi / ZIP</h3>")
-    gr.Markdown("<div class='small-note'>Options: single file, multiple files, or ZIP (default ZIP password: <code>dietcoke1</code>). Memory available.</div>")
     with gr.Tabs():
-        # --- Single file tab ---
-        with gr.TabItem("Single file"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    single_audio = gr.File(label="Select audio file", file_count="single", type="filepath")
-                    model_select_single = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
-                    device_single = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
-                    mem_single = gr.Checkbox(label="Enable correction memory", value=False)
-                    single_transcribe_btn = gr.Button("Transcribe single file", variant="primary")
                 with gr.Column(scale=1):
-                    single_transcript = gr.Textbox(label="Transcript", lines=14, interactive=False)
                     single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
-                    single_doc_download = gr.File(label="Download .docx (single)")
-            def _single_run(file_path, model_name, device_choice, enable_mem):
-                if not file_path:
-                    return "", "No file selected.", None
-                path = file_path if isinstance(file_path, str) else (file_path.name if hasattr(file_path, "name") else str(file_path))
-                text, err, logs = transcribe_file(path, model_name=model_name, device_choice=device_choice, enable_memory=enable_mem)
-                if err:
-                    return text, logs, None
-                # write docx
-                try:
-                    out_doc = os.path.join(tempfile.gettempdir(), f"{Path(path).stem}_{uuid4().hex[:8]}.docx")
-                    save_as_word(text or "", out_doc)
-                except Exception as e:
-                    logs = (logs or "") + f"\nFailed to write docx: {e}"
-                    out_doc = None
-                return text, logs, out_doc
-            single_transcribe_btn.click(fn=_single_run, inputs=[single_audio, model_select_single, device_single, mem_single], outputs=[single_transcript, single_logs, single_doc_download])
-        # --- Multi-file tab ---
-        with gr.TabItem("Multi-file"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    multi_files = gr.File(label="Upload multiple audio files", file_count="multiple", type="filepath")
-                    model_multi = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
-                    device_multi = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
-                    mem_multi = gr.Checkbox(label="Enable correction memory", value=False)
-                    merge_multi = gr.Checkbox(label="Merge into single .docx (optional)", value=True)
-                    multi_run_btn = gr.Button("Start multi-file transcription", variant="primary")
                 with gr.Column(scale=1):
-                    multi_transcript = gr.Textbox(label="Combined Transcript", lines=14, interactive=False)
-                    multi_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
-                    multi_perfiles_zip = gr.File(label="Download per-file .docx ZIP (separate docs)", interactive=False)
-                    multi_merged_doc = gr.File(label="Download merged docx (if created)", interactive=False)
-            def _multi_run(files, model_name, device_choice, enable_mem, merge_flag):
-                if not files:
-                    return "", "No files uploaded.", None, None
-                paths = [str(f) for f in files] if isinstance(files, (list, tuple)) else [str(files)]
-                combined, logs, zip_path, merged_path = batch_transcribe_from_paths(paths, model_name, device_choice, enable_mem, merge_flag)
-                return combined, logs, zip_path, merged_path
-            multi_run_btn.click(fn=_multi_run, inputs=[multi_files, model_multi, device_multi, mem_multi, merge_multi], outputs=[multi_transcript, multi_logs, multi_perfiles_zip, multi_merged_doc])
-        # --- ZIP tab ---
-        with gr.TabItem("ZIP"):
-            with gr.Row():
-                with gr.Column(scale=1):
-                    zip_file = gr.File(label="Upload ZIP containing audio files", file_count="single", type="filepath")
-                    use_default_zip_pass = gr.Checkbox(label="Use default ZIP password (dietcoke1)", value=True)
-                    zip_password = gr.Textbox(label="ZIP password (override)", placeholder="If left empty and default checked, 'dietcoke1' will be used")
-                    model_zip = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
-                    device_zip = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
-                    mem_zip = gr.Checkbox(label="Enable correction memory", value=False)
-                    merge_zip = gr.Checkbox(label="Merge into single .docx (optional)", value=True)
-                    zip_extract_btn = gr.Button("Extract & Transcribe ZIP", variant="primary")
-                with gr.Column(scale=1):
-                    zip_extract_logs = gr.Textbox(label="Extraction & transcription logs", lines=12, interactive=False)
-                    zip_perfiles_zip = gr.File(label="Download per-file .docx ZIP", interactive=False)
-                    zip_merged_doc = gr.File(label="Download merged docx (if created)", interactive=False)
-            def _zip_run(zfile, use_default, pwd_override, model_name, device_choice, enable_mem, merge_flag):
-                if not zfile:
-                    return "No ZIP provided.", None, None
-                zip_path = zfile if isinstance(zfile, str) else (zfile.name if hasattr(zfile, "name") else str(zfile))
-                final_pwd = None
-                if use_default and (not pwd_override or pwd_override.strip() == ""):
-                    final_pwd = "dietcoke1"
-                elif pwd_override and pwd_override.strip():
-                    final_pwd = pwd_override.strip()
-                extracted_map, logs0 = extract_zip_and_list(zip_path, final_pwd)
-                logs_lines = [logs0]
-                if not extracted_map:
-                    return "\n".join(logs_lines), None, None
-                # transcribe in file order
-                paths = [extracted_map[k] for k in sorted(extracted_map.keys())]
-                combined, logs1, per_zip, merged_doc = batch_transcribe_from_paths(paths, model_name, device_choice, enable_mem, merge_flag)
-                logs_lines.append(logs1)
-                # final logs
-                return "\n\n".join(logs_lines), per_zip, merged_doc
-            zip_extract_btn.click(fn=_zip_run, inputs=[zip_file, use_default_zip_pass, zip_password, model_zip, device_zip, mem_zip, merge_zip], outputs=[zip_extract_logs, zip_perfiles_zip, zip_merged_doc])
-        # --- Memory tab ---
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="filepath")
-                    mem_import_btn = gr.Button("Import memory file")
-                    mem_add_text = gr.Textbox(label="Add single word or phrase", placeholder="Type word or phrase")
-                    mem_add_btn = gr.Button("Add to memory")
-                    mem_clear_btn = gr.Button("Clear memory")
-                    mem_view_btn = gr.Button("View memory")
                 with gr.Column(scale=1):
-                    mem_status = gr.Textbox(label="Memory status / preview", lines=14, interactive=False)
-            def _import_mem(uploaded):
-                if not uploaded:
-                    return "No file provided."
-                path = uploaded if isinstance(uploaded, str) else (uploaded.name if hasattr(uploaded, "name") else str(uploaded))
-                try:
-                    with open(path, "r", encoding="utf-8") as fh:
-                        raw = fh.read()
-                    parsed = None
-                    try:
-                        parsed = json.loads(raw)
-                    except Exception:
-                        parsed = None
-                    added = 0
-                    if isinstance(parsed, dict):
-                        with MEMORY_LOCK:
-                            for k, v in parsed.get("words", {}).items():
-                                memory["words"][k.lower()] = memory["words"].get(k.lower(), 0) + int(v)
-                                added += 1
-                            for k, v in parsed.get("phrases", {}).items():
-                                memory["phrases"][k] = memory["phrases"].get(k, 0) + int(v)
-                                added += 1
-                            save_memory(memory)
-                        return f"Imported memory JSON entries: {added}"
-                    # fallback to line-per-entry
-                    lines = [l.strip() for l in raw.splitlines() if l.strip()]
-                    with MEMORY_LOCK:
-                        for line in lines:
-                            if "," in line:
-                                k, c = line.split(",", 1)
-                                try:
-                                    cnt = int(c)
-                                except:
-                                    cnt = 1
-                                memory["words"][k.lower()] = memory["words"].get(k.lower(), 0) + cnt
-                            else:
-                                # short lines -> words, longer -> phrase
-                                if len(line.split()) <= 3:
-                                    memory["words"][line.lower()] = memory["words"].get(line.lower(), 0) + 1
-                                else:
-                                    memory["phrases"][line] = memory["phrases"].get(line, 0) + 1
-                            added += 1
-                        save_memory(memory)
-                    return f"Imported {added} entries from text."
-                except Exception as e:
-                    return f"Import failed: {e}"
-            def _add_mem(txt):
-                if not txt or not txt.strip():
                     return "No entry provided."
-                e = txt.strip()
                 with MEMORY_LOCK:
                     if len(e.split()) <= 3:
                         memory["words"][e.lower()] = memory["words"].get(e.lower(), 0) + 1
@@ -690,20 +1089,57 @@ with gr.Blocks(title="Whisper Transcriber - single/multi/zip", css=CSS) as demo:
                 w = memory.get("words", {})
                 p = memory.get("phrases", {})
                 out_lines = []
-                out_lines.append("WORDS (top 50):")
-                for k, v in sorted(w.items(), key=lambda kv: -kv[1])[:50]:
                     out_lines.append(f"{k}: {v}")
                 out_lines.append("")
-                out_lines.append("PHRASES (top 50):")
-                for k, v in sorted(p.items(), key=lambda kv: -kv[1])[:50]:
                     out_lines.append(f"{k}: {v}")
                 return "\n".join(out_lines)
-            mem_import_btn.click(fn=_import_mem, inputs=[mem_upload], outputs=[mem_status])
-            mem_add_btn.click(fn=_add_mem, inputs=[mem_add_text], outputs=[mem_status])
             mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
 # ---------- Launch ----------
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))

 # app.py
+# Whisper Transcriber — Gradio 3.x compatible full file
+# Requirements: gradio (3.x), whisper, pydub, pyzipper, python-docx, ffmpeg installed
 import os
 import sys
 import traceback
 import threading
 import re
+import zipfile
 from difflib import get_close_matches
 from uuid import uuid4
+from pathlib import Path
+# Force unbuffered prints for logs
 os.environ["PYTHONUNBUFFERED"] = "1"
 print("DEBUG: app.py bootstrap starting", flush=True)
+# Third-party imports (ensure installed)
 try:
     import gradio as gr
     import whisper
     raise
 # ---------- Config ----------
 MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
+MIN_WAV_SIZE = 1024
 FFMPEG_CANDIDATES = [
     ("s16le", 16000, 1),
     ("s16le", 44100, 2),
     ("mulaw", 8000, 1),
 ]
 MODEL_CACHE = {}
+EXTRACT_MAP = {}  # friendly_name -> absolute path
+# ---------- Memory & postprocessing ----------
 def load_memory():
     try:
         if os.path.exists(MEMORY_FILE):
 memory = load_memory()
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
     "dx": "diagnosis",
     "adm": "admit",
     "disch": "discharge",
 }
 DRUG_NORMALIZATION = {
     "metformin": "Metformin",
     "aspirin": "Aspirin",
     return filename
+def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
     try:
+        cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
+        if fmt in ("s16le", "pcm_s16le", "mulaw"):
+            cmd += ["-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path, out_path]
+        else:
+            cmd += ["-i", input_path, "-ar", str(sr), "-ac", str(ch), out_path]
         proc = subprocess.run(cmd, capture_output=True, timeout=60, text=True)
+        stdout_stderr = (proc.stdout or "") + (proc.stderr or "")
         if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
+            return True, stdout_stderr
         else:
             try:
                 if os.path.exists(out_path):
                     os.unlink(out_path)
             except Exception:
                 pass
+            return False, stdout_stderr
     except Exception as e:
         try:
             if os.path.exists(out_path):
     if lower.endswith(".wav"):
         return input_path
+    auto_err = ""
     tmp = None
     try:
         tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
             except Exception:
                 pass
     except Exception:
+        auto_err = traceback.format_exc()
         try:
             if tmp and os.path.exists(tmp.name):
                 os.unlink(tmp.name)
         except Exception:
             pass
+    # ffmpeg fallback
     diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
     diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
     diagnostics = []
     for fmt, sr, ch in FFMPEG_CANDIDATES:
         out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
         out_wav.close()
+        success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
         diagnostics.append(f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n")
         if success:
             try:
                 with open(diag_log, "w", encoding="utf-8") as fh:
+                    fh.write("pydub auto error:\n")
+                    fh.write(auto_err + "\n\n")
+                    fh.write("Successful ffmpeg candidate:\n")
+                    fh.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
                     fh.write("Diagnostics:\n")
                     fh.write("\n".join(diagnostics))
             except Exception:
             except Exception:
                 pass
+    try:
+        fp = subprocess.run(
+            ["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+        diagnostics.append("FFPROBE:\n" + (fp.stdout.strip() or fp.stderr.strip()))
+    except Exception as e:
+        diagnostics.append("ffprobe failed: " + str(e))
     try:
         with open(input_path, "rb") as fh:
             head = fh.read(512)
     try:
         with open(diag_log, "w", encoding="utf-8") as fh:
+            fh.write("pydub auto error:\n")
+            fh.write(auto_err + "\n\n")
             fh.write("Full diagnostics:\n\n")
             fh.write("\n\n".join(diagnostics))
     except Exception as e:
 def get_whisper_model(name, device=None):
+    if name not in MODEL_CACHE:
+        print(f"DEBUG: loading whisper model '{name}'", flush=True)
         try:
+            if device:
+                MODEL_CACHE[name] = whisper.load_model(name, device=device)
             else:
+                MODEL_CACHE[name] = whisper.load_model(name)
         except TypeError:
+            MODEL_CACHE[name] = whisper.load_model(name)
+    return MODEL_CACHE[name]
+# ---------- SRT helper ----------
+def segments_to_srt(segments):
+    def fmt_time(t):
+        h = int(t // 3600)
+        m = int((t % 3600) // 60)
+        s = int(t % 60)
+        ms = int((t - int(t)) * 1000)
+        return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
+    lines = []
+    for i, seg in enumerate(segments, start=1):
+        start = seg.get("start", 0)
+        end = seg.get("end", 0)
+        text = seg.get("text", "").strip()
+        lines.append(str(i))
+        lines.append(f"{fmt_time(start)} --> {fmt_time(end)}")
+        lines.append(text)
+        lines.append("")
+    return "\n".join(lines)
+# ---------- ZIP extraction (per-run dir) ----------
+def extract_zip_and_map(zip_path, zip_password=None):
+    global EXTRACT_MAP
+    EXTRACT_MAP = {}
+    run_id = uuid4().hex
+    temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
     logs = []
     try:
+        os.makedirs(temp_extract_dir, exist_ok=True)
         with pyzipper.ZipFile(zip_path, "r") as zf:
             if zip_password:
                 try:
                     zf.setpassword(zip_password.encode())
                 except Exception:
+                    logs.append("Warning: failed to set zip password (continuing).")
+            count = {}
+            supported = [".mp3", ".wav", ".aac", ".flac", ".ogg", ".m4a", ".dat", ".dct"]
             for info in zf.infolist():
                 if info.is_dir():
                     continue
                 _, ext = os.path.splitext(info.filename)
+                if ext.lower() not in supported:
                     continue
                 try:
                     zf.extract(info, path=temp_extract_dir)
                 fullp = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
                 if not os.path.exists(fullp):
                     continue
+                base = os.path.basename(info.filename)
+                key = base
+                if key in EXTRACT_MAP:
+                    idx = count.get(base, 1) + 1
+                    count[base] = idx
+                    name_only, extn = os.path.splitext(base)
+                    key = f"{name_only} ({idx}){extn}"
+                else:
+                    count[base] = 1
+                EXTRACT_MAP[key] = fullp
                 logs.append(f"Extracted: {info.filename} -> {key}")
+        if not EXTRACT_MAP:
             logs.append("No supported audio files found in ZIP.")
+            return [], "\n".join(logs)
+        friendly = sorted(EXTRACT_MAP.keys())
+        return friendly, "\n".join(logs)
+    except Exception as e:
+        traceback.print_exc()
+        try:
+            if os.path.exists(temp_extract_dir):
+                shutil.rmtree(temp_extract_dir)
+        except Exception:
+            pass
+        return [], f"Extraction failed: {e}"
+# ---------- Trim helper used in two-pass ----------
+def trim_audio_segment(src_path, start_sec, end_sec):
+    src = str(src_path)
+    out_tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    out_tmp.close()
+    out_path = out_tmp.name
+    try:
+        cmd = [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-y",
+            "-ss",
+            str(start_sec),
+            "-to",
+            str(end_sec),
+            "-i",
+            src,
+            "-ar",
+            "16000",
+            "-ac",
+            "1",
+            out_path,
+        ]
+        proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
+        if proc.returncode != 0 or not os.path.exists(out_path) or os.path.getsize(out_path) < MIN_WAV_SIZE:
+            try:
+                if os.path.exists(out_path):
+                    os.unlink(out_path)
+            except Exception:
+                pass
+            raise Exception(f"ffmpeg trim failed: {proc.stderr or proc.stdout}")
+        return out_path
     except Exception as e:
+        try:
+            if os.path.exists(out_path):
+                os.unlink(out_path)
+        except Exception:
+            pass
+        raise
+# ---------- Core transcription (single file, supports two-pass) ----------
+def transcribe_single_file(
+    path,
+    model_name="small",
+    device_choice="auto",
+    enable_memory=False,
+    generate_srt=False,
+    use_two_pass=False,
+    fast_model="small",
+    refine_model=None,
+    refine_threshold=-1.0,
+):
     logs = []
     try:
         if not path:
+            return None, None, "No file provided."
+        p = path.name if hasattr(path, "name") else str(path)
+        device = None if device_choice == "auto" else device_choice
+        if not use_two_pass:
+            model = get_whisper_model(model_name, device=device)
+            logs.append(f"Loaded model: {model_name}")
+            wav = convert_to_wav_if_needed(p)
+            logs.append(f"Converted to WAV: {os.path.basename(wav)}")
+            result = model.transcribe(wav)
+            text = result.get("text", "").strip()
+            if enable_memory:
+                text = memory_correct_text(text)
+            text = postprocess_transcript(text)
+            srt_path = None
+            if generate_srt and result.get("segments"):
+                srt_text = segments_to_srt(result["segments"])
+                srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
+                with open(srt_fp, "w", encoding="utf-8") as fh:
+                    fh.write(srt_text)
+                srt_path = srt_fp
+                logs.append(f"SRT generated: {srt_path}")
+            if enable_memory:
+                try:
+                    update_memory_with_transcript(text)
+                    logs.append("Memory updated.")
+                except Exception:
+                    pass
+            if wav and os.path.exists(wav) and wav != p:
+                try:
+                    os.unlink(wav)
+                except Exception:
+                    pass
+            return text, srt_path, "\n".join(logs)
+        # Two-pass
+        if refine_model is None:
+            refine_model = model_name
+        logs.append(f"Two-pass enabled: fast_model={fast_model}, refine_model={refine_model}, threshold={refine_threshold}")
+        fast = get_whisper_model(fast_model, device=device)
+        logs.append(f"Loaded fast model: {fast_model}")
         wav = convert_to_wav_if_needed(p)
+        logs.append(f"Converted to WAV: {os.path.basename(wav)}")
+        fast_result = fast.transcribe(wav)
+        segments = fast_result.get("segments") or []
+        if not segments:
+            text = fast_result.get("text", "").strip()
+            if enable_memory:
+                text = memory_correct_text(text)
+                update_memory_with_transcript(text)
+            text = postprocess_transcript(text)
+            srt_ret = None
+            if generate_srt and fast_result.get("segments"):
+                srt_text = segments_to_srt(fast_result["segments"])
+                srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
+                with open(srt_fp, "w", encoding="utf-8") as fh:
+                    fh.write(srt_text)
+                srt_ret = srt_fp
+                logs.append(f"SRT generated: {srt_fp}")
+            if wav and os.path.exists(wav) and wav != p:
+                try:
+                    os.unlink(wav)
+                except Exception:
+                    pass
+            return text, srt_ret, "\n".join(logs)
+        refined_segments = []
+        segments_to_refine = []
+        for seg in segments:
+            seg_text = seg.get("text", "").strip()
+            if enable_memory:
+                corrected = memory_correct_text(seg_text)
+            else:
+                corrected = seg_text
+            seg_copy = dict(seg)
+            seg_copy["text"] = corrected
+            refined_segments.append(seg_copy)
+            avg_lp = seg.get("avg_logprob", None)
+            if avg_lp is None:
+                continue
+            try:
+                if float(avg_lp) < float(refine_threshold):
+                    segments_to_refine.append(seg_copy)
+            except Exception:
+                continue
+        logs.append(f"Fast pass: {len(segments)} segments, {len(segments_to_refine)} to refine.")
+        if segments_to_refine:
+            refine = get_whisper_model(refine_model, device=device)
+            logs.append(f"Loaded refine model: {refine_model}")
+            for seg in segments_to_refine:
+                start = seg.get("start", 0.0)
+                end = seg.get("end", start + seg.get("duration", 0.0))
+                if end <= start:
+                    continue
+                try:
+                    seg_wav = trim_audio_segment(wav, start, end)
+                    r_result = refine.transcribe(seg_wav)
+                    new_text = r_result.get("text", "").strip()
+                    if enable_memory:
+                        new_text = memory_correct_text(new_text)
+                    for rs in refined_segments:
+                        if abs(rs.get("start", 0.0) - start) < 0.001 and abs(rs.get("end", 0.0) - end) < 0.001:
+                            rs["text"] = new_text
+                            if r_result.get("segments"):
+                                rs["avg_logprob"] = r_result["segments"][0].get("avg_logprob", rs.get("avg_logprob"))
+                            break
+                    try:
+                        if os.path.exists(seg_wav):
+                            os.unlink(seg_wav)
+                    except Exception:
+                        pass
+                except Exception as e:
+                    logs.append(f"Refine failed for {start}-{end}: {e}")
+                    continue
+        full_text_parts = [s.get("text", "").strip() for s in sorted(refined_segments, key=lambda x: x.get("start", 0.0))]
+        combined_text = " ".join([p for p in full_text_parts if p])
         if enable_memory:
+            combined_text = memory_correct_text(combined_text)
             try:
+                update_memory_with_transcript(combined_text)
+                logs.append("Memory updated.")
             except Exception:
                 pass
+        combined_text = postprocess_transcript(combined_text)
+        srt_path = None
+        if generate_srt:
+            srt_segs = []
+            for rs in sorted(refined_segments, key=lambda x: x.get("start", 0.0)):
+                srt_segs.append({"start": rs.get("start", 0.0), "end": rs.get("end", 0.0), "text": rs.get("text", "")})
+            srt_text = segments_to_srt(srt_segs)
+            srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}_two_pass.srt")
+            with open(srt_fp, "w", encoding="utf-8") as fh:
+                fh.write(srt_text)
+            srt_path = srt_fp
+            logs.append(f"SRT generated: {srt_path}")
+        if wav and os.path.exists(wav) and wav != p:
             try:
                 os.unlink(wav)
             except Exception:
                 pass
+        return combined_text, srt_path, "\n".join(logs)
     except Exception as e:
         tb = traceback.format_exc()
+        return "", None, f"Transcription error: {e}\n{tb}"
+# ---------- Batch transcribe ----------
+def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
     logs = []
     transcripts = []
+    srt_files = []
+    out_doc = None
+    paths = []
+    if friendly_selected:
+        for key in friendly_selected:
+            p = EXTRACT_MAP.get(key)
+            if p:
+                paths.append(p)
+            else:
+                logs.append(f"Warning: selected not found in extract map: {key}")
+    if uploaded_files:
+        if isinstance(uploaded_files, (list, tuple)):
+            for f in uploaded_files:
+                paths.append(str(f))
         else:
+            paths.append(str(uploaded_files))
+    if not paths:
+        return "", "No files selected or uploaded.", None, None
+    total = len(paths)
+    for idx, p in enumerate(paths, start=1):
+        logs.append(f"[{idx}/{total}] Processing: {p}")
+        text, srt_path, lg = transcribe_single_file(
+            p,
+            model_name=model_name,
+            device_choice=device_name,
+            enable_memory=enable_mem,
+            generate_srt=generate_srt,
+            use_two_pass=use_two_pass,
+            fast_model=fast_model,
+            refine_model=model_name,
+            refine_threshold=refine_threshold,
+        )
+        logs.append(lg)
+        transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
+        if srt_path:
+            srt_files.append(srt_path)
     combined = "\n\n".join(transcripts)
     if merge_flag:
         try:
+            out_doc = save_as_word(combined)
+            logs.append(f"Merged saved: {out_doc}")
         except Exception as e:
             logs.append(f"Merge failed: {e}")
+    srt_return = srt_files[0] if srt_files else None
+    return combined, "\n".join(logs), out_doc, srt_return
+# ---------- Robust multi-file memory importer ----------
+def _read_file_text_try_encodings(path):
+    """
+    Try multiple encodings to read a text file. Returns tuple (text(str), encoding_used or None).
+    On failure returns (None, None).
+    """
+    encodings = ["utf-8", "utf-16", "latin-1"]
+    for enc in encodings:
+        try:
+            with open(path, "r", encoding=enc) as fh:
+                return fh.read(), enc
+        except UnicodeDecodeError:
+            continue
+        except Exception:
+            break
+    # Last resort: try open as binary and attempt utf-8 with errors='replace'
     try:
+        with open(path, "rb") as fh:
+            raw = fh.read()
+            try:
+                text = raw.decode("utf-8")
+                return text, "utf-8(guessed)"
+            except Exception:
+                text = raw.decode("latin-1", errors="replace")
+                return text, "latin-1(replaced)"
+    except Exception:
+        return None, None
+def _process_single_memory_text(text):
+    """
+    Given the text of a file, merge into memory dict.
+    Returns number of 'entries' added.
+    """
+    added = 0
+    # try JSON first
+    try:
+        parsed = json.loads(text)
+        if isinstance(parsed, dict):
+            words = parsed.get("words", {})
+            phrases = parsed.get("phrases", {})
+            with MEMORY_LOCK:
+                for k, v in words.items():
+                    try:
+                        cnt = int(v)
+                    except Exception:
+                        cnt = 1
+                    memory["words"][k.lower()] = memory["words"].get(k.lower(), 0) + cnt
+                    added += 1
+                for k, v in phrases.items():
+                    try:
+                        cnt = int(v)
+                    except Exception:
+                        cnt = 1
+                    memory["phrases"][k] = memory["phrases"].get(k, 0) + cnt
+                    added += 1
+            return added
+    except Exception:
+        pass
+    # fallback: line-by-line file with optional "word,count" or plain lines
+    lines = [l.strip() for l in text.splitlines() if l.strip()]
+    with MEMORY_LOCK:
+        for line in lines:
+            if "," in line:
+                parts = [p.strip() for p in line.split(",", 1)]
+                key = parts[0]
+                try:
+                    cnt = int(parts[1])
+                except Exception:
+                    cnt = 1
+                memory["words"][key.lower()] = memory["words"].get(key.lower(), 0) + cnt
+                added += 1
+            else:
+                # if short, treat as word; otherwise phrase
+                if len(line.split()) <= 3:
+                    memory["words"][line.lower()] = memory["words"].get(line.lower(), 0) + 1
+                    added += 1
+                else:
+                    memory["phrases"][line] = memory["phrases"].get(line, 0) + 1
+                    added += 1
+    return added
+def import_memory_files(uploaded_files):
+    """
+    Accepts a single path or a list of paths (filepaths from gr.File with type='filepath').
+    Supports plain text, JSON, and zip files containing text/JSON files.
+    Returns a friendly status string.
+    """
+    if not uploaded_files:
+        return "No files provided."
+    if isinstance(uploaded_files, (str, os.PathLike)):
+        uploaded_files = [str(uploaded_files)]
+    elif isinstance(uploaded_files, dict) and uploaded_files.get("name"):
+        uploaded_files = [uploaded_files["name"]]
+    elif isinstance(uploaded_files, (list, tuple)):
+        normalized = []
+        for f in uploaded_files:
+            if isinstance(f, (str, os.PathLike)):
+                normalized.append(str(f))
+            elif isinstance(f, dict) and f.get("name"):
+                normalized.append(f["name"])
+            elif hasattr(f, "name"):
+                normalized.append(f.name)
+        uploaded_files = normalized
+    else:
+        return "Unable to interpret uploaded files."
+    total_added = 0
+    skipped = []
+    messages = []
+    for fp in uploaded_files:
+        try:
+            if not os.path.exists(fp):
+                messages.append(f"Skipped missing: {fp}")
+                continue
+            lower = fp.lower()
+            if lower.endswith(".zip"):
+                try:
+                    with zipfile.ZipFile(fp, "r") as zf:
+                        for info in zf.infolist():
+                            if info.is_dir():
+                                continue
+                            name = info.filename
+                            try:
+                                with zf.open(info) as member:
+                                    raw = member.read()
+                                    text = None
+                                    for enc in ("utf-8", "utf-16", "latin-1"):
+                                        try:
+                                            text = raw.decode(enc)
+                                            break
+                                        except Exception:
+                                            text = None
+                                    if text is None:
+                                        text = raw.decode("latin-1", errors="replace")
+                                    added = _process_single_memory_text(text)
+                                    total_added += added
+                                    messages.append(f"Imported {added} from ZIP member {name}")
+                    messages.append(f"Processed ZIP: {os.path.basename(fp)}")
+                    continue
+                except zipfile.BadZipFile:
+                    messages.append(f"Bad zip: {fp}")
+                    continue
+            # otherwise try to read as text with multiple encodings
+            text, used_enc = _read_file_text_try_encodings(fp)
+            if text is None:
+                skipped.append(fp)
+                continue
+            added = _process_single_memory_text(text)
+            total_added += added
+            messages.append(f"Imported {added} from {os.path.basename(fp)} (enc={used_enc})")
+        except Exception as e:
+            skipped.append(f"{fp}: {e}")
+    try:
+        save_memory(memory)
+    except Exception:
+        pass
+    summary_lines = []
+    summary_lines.append(f"Total entries added: {total_added}")
+    if messages:
+        summary_lines.append("Details:")
+        summary_lines.extend(messages)
+    if skipped:
+        summary_lines.append("Skipped/failed:")
+        summary_lines.extend(skipped)
+    return "\n".join(summary_lines)
+# ---------- Build Gradio UI (3.x compatible) ----------
 print("DEBUG: building Gradio UI", flush=True)
 available_choices, default_choice = safe_model_choices(prefer_default="small")
 CSS = """
 :root{
   --accent:#4f46e5;
+  --muted:#6b7280;
+  --card:#ffffff;
+  --bg:#f7f8fb;
+  --text:#0f172a;
+  --transcript-bg:#0f172a;
   --transcript-color:#e6eef8;
 }
+[data-theme="dark"] {
+  --accent: #7c3aed;
+  --muted: #9ca3af;
+  --card: #0b1220;
+  --bg: #071022;
+  --text: #e6eef8;
+  --transcript-bg: #071026;
+  --transcript-color: #e6eef8;
+}
 body { background: var(--bg); color: var(--text); font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
+.header { padding: 14px; border-radius: 10px; background: linear-gradient(90deg, rgba(79,70,229,0.08), rgba(99,102,241,0.02)); margin-bottom: 12px; display:flex;align-items:center;gap:12px; }
+.app-icon { width:50px;height:50px;border-radius:10px;background:linear-gradient(135deg,var(--accent),#06b6d4);display:flex;align-items:center;justify-content:center;color:white;font-weight:700;font-size:20px; }
+.card { background:var(--card); border-radius:10px; padding:12px; box-shadow: 0 6px 20px rgba(16,24,40,0.04); }
+.transcript-area { white-space:pre-wrap; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, "Roboto Mono", monospace; background: var(--transcript-bg); color: var(--transcript-color); padding:12px; border-radius:8px; min-height:200px; }
 .small-note { color:var(--muted); font-size:12px;}
 """
+with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
+    # Theme initializer + toggle injected via HTML (works across gradio versions)
+    gr.HTML("""
+    <script>
+    (function() {
+      try {
+        const saved = localStorage.getItem('wt_theme');
+        if (saved) {
+          document.documentElement.setAttribute('data-theme', saved);
+        } else {
+          document.documentElement.setAttribute('data-theme', 'dark');
+        }
+      } catch (e) { console.warn('theme init failed', e); }
+    })();
+    </script>
+    """)
+    # Header
+    with gr.Row():
+        with gr.Column(scale=0):
+            gr.HTML("<div style='width:50px;height:50px;border-radius:10px;background:linear-gradient(135deg,#4f46e5,#06b6d4);display:flex;align-items:center;justify-content:center;color:white;font-weight:700;font-size:20px;'>WT</div>")
+        with gr.Column():
+            gr.Markdown("<h3 style='margin:0'>Whisper Transcriber (Gradio 3.x)</h3>")
+            gr.Markdown("<div class='small-note'>Two-pass speedup, per-run ZIP extraction, memory corrections, SRT export, dark theme default</div>")
     with gr.Tabs():
+        # Single audio
+        with gr.TabItem("Audio Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    gr.Markdown("### Input")
+                    single_audio = gr.Audio(label="Upload or record audio", type="filepath")
+                    model_select = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
+                    device_choice = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
+                    mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
+                    srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
+                    use_two_pass_single = gr.Checkbox(label="Use two-pass speedup (fast then refine)", value=False)
+                    fast_model_choice = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model")
+                    refine_threshold_single = gr.Number(value=-1.0, label="Refine threshold (avg_logprob)", precision=2)
+                    transcribe_btn = gr.Button("Transcribe", variant="primary")
                 with gr.Column(scale=1):
+                    gr.Markdown("### Output")
+                    audio_preview = gr.Audio(interactive=False)
+                    transcript_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
+                    srt_download = gr.File(label="SRT (if generated)")
                     single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
+            def _single_action(audio_file, model_name, device, mem_on, srt_on, use_two_pass_flag, fast_model, refine_thresh):
+                if not audio_file:
+                    return None, "", None, "No audio provided."
+                path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
+                text, srt_path, logs = transcribe_single_file(
+                    path,
+                    model_name=model_name,
+                    device_choice=device,
+                    enable_memory=mem_on,
+                    generate_srt=srt_on,
+                    use_two_pass=use_two_pass_flag,
+                    fast_model=fast_model,
+                    refine_model=model_name,
+                    refine_threshold=refine_thresh,
+                )
+                preview = audio_file
+                return preview, text, srt_path, logs
+            transcribe_btn.click(
+                fn=_single_action,
+                inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle, use_two_pass_single, fast_model_choice, refine_threshold_single],
+                outputs=[audio_preview, transcript_out, srt_download, single_logs],
+            )
+        # Batch tab
+        with gr.TabItem("Batch Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    gr.Markdown("### Batch input")
+                    batch_files = gr.File(label="Upload audio files (optional)", file_count="multiple", type="filepath")
+                    batch_zip = gr.File(label="Or upload ZIP with audio (optional)", file_count="single", type="filepath")
+                    zip_password = gr.Textbox(label="ZIP password (optional)")
+                    batch_extract_btn = gr.Button("Extract ZIP & List files")
+                    batch_extract_logs = gr.Textbox(label="Extraction logs", lines=6, interactive=False)
+                    batch_select = gr.CheckboxGroup(choices=[], label="Select extracted files", interactive=True)
+                    batch_model = gr.Dropdown(choices=available_choices, value=default_choice, label="Model")
+                    batch_device = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
+                    batch_merge = gr.Checkbox(label="Merge transcripts to DOCX", value=True)
+                    batch_mem = gr.Checkbox(label="Enable memory corrections", value=False)
+                    batch_srt = gr.Checkbox(label="Generate SRT(s)", value=False)
+                    batch_use_two_pass = gr.Checkbox(label="Use two-pass speedup", value=False)
+                    batch_fast_model = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model")
+                    batch_refine_threshold = gr.Number(value=-1.0, label="Refine threshold", precision=2)
+                    batch_run_btn = gr.Button("Start Batch Transcription", variant="primary")
                 with gr.Column(scale=1):
+                    gr.Markdown("### Batch Output")
+                    batch_trans_out = gr.Textbox(label="Transcript (combined)", lines=16, interactive=False)
+                    batch_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
+                    batch_doc_download = gr.File(label="Merged DOCX (if created)")
+                    batch_srt_download = gr.File(label="First SRT (if any)")
+            def _do_extract(zip_file, password):
+                if not zip_file:
+                    return gr.update(choices=[]), "No ZIP provided."
+                zip_path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
+                friendly, logs = extract_zip_and_map(zip_path, password)
+                return gr.update(choices=friendly), logs
+            batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
+            def _do_batch(friendly_selected, uploaded_files, model_name, device, merge_flag, mem_flag, srt_flag, use_two_pass_flag, fast_model, refine_thresh):
+                combined, logs, out_doc, srt_path = batch_transcribe(
+                    friendly_selected,
+                    uploaded_files,
+                    model_name,
+                    device,
+                    merge_flag,
+                    mem_flag,
+                    srt_flag,
+                    use_two_pass=use_two_pass_flag,
+                    fast_model=fast_model,
+                    refine_threshold=refine_thresh,
+                )
+                return combined, logs, out_doc, srt_path
+            batch_run_btn.click(
+                fn=_do_batch,
+                inputs=[batch_select, batch_files, batch_model, batch_device, batch_merge, batch_mem, batch_srt, batch_use_two_pass, batch_fast_model, batch_refine_threshold],
+                outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download],
+            )
+        # Memory tab (updated to accept multiple files or zips)
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    gr.Markdown("### Correction Memory")
+                    mem_upload = gr.File(label="Import memory files (text/JSON/zip). You may select multiple files", file_count="multiple", type="filepath")
+                    mem_import_btn = gr.Button("Import memory files")
+                    mem_text = gr.Textbox(label="Add word/phrase", placeholder="Type word or phrase")
+                    mem_add_btn = gr.Button("Add to Memory")
+                    mem_clear_btn = gr.Button("Clear Memory")
+                    mem_view_btn = gr.Button("View Memory")
+                    mem_status = gr.Textbox(label="Memory status / preview", lines=12, interactive=False)
                 with gr.Column(scale=1):
+                    gr.Markdown("### Memory controls")
+                    gr.Markdown("- JSON format: {\"words\": {\"word\": count}, \"phrases\": {\"phrase\": count}}")
+                    gr.Markdown("- Plain text: one word/phrase per line or `word,count` per line")
+                    gr.Markdown("- ZIP files: will be scanned and any text/JSON files imported")
+            mem_import_btn.click(fn=import_memory_files, inputs=[mem_upload], outputs=[mem_status])
+            def _add_mem(entry):
+                if not entry or not entry.strip():
                     return "No entry provided."
+                e = entry.strip()
                 with MEMORY_LOCK:
                     if len(e.split()) <= 3:
                         memory["words"][e.lower()] = memory["words"].get(e.lower(), 0) + 1
                 w = memory.get("words", {})
                 p = memory.get("phrases", {})
                 out_lines = []
+                out_lines.append("WORDS (top 30):")
+                for k, v in sorted(w.items(), key=lambda kv: -kv[1])[:30]:
                     out_lines.append(f"{k}: {v}")
                 out_lines.append("")
+                out_lines.append("PHRASES (top 20):")
+                for k, v in sorted(p.items(), key=lambda kv: -kv[1])[:20]:
                     out_lines.append(f"{k}: {v}")
                 return "\n".join(out_lines)
+            mem_add_btn.click(fn=_add_mem, inputs=[mem_text], outputs=[mem_status])
             mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
+        # Settings tab (theme toggle via injected HTML)
+        with gr.TabItem("Settings"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Runtime & tips")
+                    gr.Markdown("- Use `large-v3` only if your whisper package supports it.")
+                    gr.Markdown("- Extraction writes to a per-run temp directory under system temp.")
+                    gr.Markdown("- Two-pass helps when heavy model is slow.")
+                with gr.Column():
+                    gr.Markdown("### Theme")
+                    gr.HTML("""
+                    <div style="display:flex;gap:8px;align-items:center;">
+                      <button id="wt_theme_btn" style="padding:8px 12px;border-radius:8px;border:1px solid rgba(0,0,0,0.06);background:var(--card);cursor:pointer;">
+                        Toggle Dark / Light Theme
+                      </button>
+                      <span style="color:var(--muted);font-size:13px;">Theme preference saved in browser</span>
+                    </div>
+                    <script>
+                    (function(){
+                      try {
+                        const root = document.documentElement;
+                        const btn = document.getElementById('wt_theme_btn');
+                        try {
+                          const saved = localStorage.getItem('wt_theme');
+                          if (saved) root.setAttribute('data-theme', saved);
+                        } catch(e){}
+                        btn.addEventListener('click', function(){
+                          try {
+                            const cur = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
+                            root.setAttribute('data-theme', cur);
+                            try { localStorage.setItem('wt_theme', cur); } catch(e){}
+                          } catch(e){ console.error(e); }
+                        });
+                      } catch(e){}
+                    })();
+                    </script>
+                    """)
 # ---------- Launch ----------
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))