moderntranscribe

Sleeping

App Files Files Community

staraks commited on Nov 24, 2025

Commit

2234d16

verified ·

1 Parent(s): 847997b

Update app.py

Browse files

Files changed (1) hide show

app.py +259 -143

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 # app.py
 # Whisper Transcriber — Gradio 3.x compatible full file
-# Features added: chunk size control, experimental parallel chunk transcription (CPU-only),
-# streaming progress bar (no audio preview), memory corrections, ZIP extraction, theme toggle.
 #
 # Requirements: gradio (3.x), whisper, pydub, pyzipper, python-docx, ffmpeg installed.
-# Experimental parallel mode uses multiprocessing and loads the 'fast' model in each worker.
 import os
 import sys
@@ -19,6 +20,10 @@ from difflib import get_close_matches
 from uuid import uuid4
 from pathlib import Path
 from multiprocessing import get_context
 from typing import Tuple, List
 # Force unbuffered prints for logs
@@ -52,9 +57,12 @@ FFMPEG_CANDIDATES = [
 MODEL_CACHE = {}
 EXTRACT_MAP = {}  # friendly_name -> absolute path
 # ---------- Worker-global for multiprocessing ----------
-# These are defined for worker processes (initialized via initializer)
-WORKER_MODEL = None  # type: ignore
 def worker_init(model_name: str, device: str):
     """
@@ -503,7 +511,7 @@ def trim_audio_segment(src_path, start_sec, end_sec):
             pass
         raise
-# ---------- Core transcription (single file) ----------
 def transcribe_single_file(
     path,
     model_name="small",
@@ -515,7 +523,6 @@ def transcribe_single_file(
     refine_model=None,
     refine_threshold=-1.0,
 ):
-    # non-streaming convenience helper used for batch mode
     logs = []
     try:
         if not path:
@@ -554,64 +561,185 @@ def transcribe_single_file(
                     pass
             return text, srt_path, "\n".join(logs)
-        # Two-pass path not used for streaming generator here
-        return "", None, "Two-pass not used in this helper."
     except Exception as e:
         tb = traceback.format_exc()
         return "", None, f"Transcription error: {e}\n{tb}"
-# ---------- Batch transcribe (unchanged) ----------
-def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
-    logs = []
     transcripts = []
     srt_files = []
-    out_doc = None
     paths = []
     if friendly_selected:
         for key in friendly_selected:
             p = EXTRACT_MAP.get(key)
             if p:
                 paths.append(p)
             else:
-                logs.append(f"Warning: selected not found in extract map: {key}")
     if uploaded_files:
         if isinstance(uploaded_files, (list, tuple)):
             for f in uploaded_files:
                 paths.append(str(f))
         else:
             paths.append(str(uploaded_files))
     if not paths:
-        return "", "No files selected or uploaded.", None, None
     total = len(paths)
     for idx, p in enumerate(paths, start=1):
-        logs.append(f"[{idx}/{total}] Processing: {p}")
-        text, srt_path, lg = transcribe_single_file(
-            p,
-            model_name=model_name,
-            device_choice=device_name,
-            enable_memory=enable_mem,
-            generate_srt=generate_srt,
-            use_two_pass=use_two_pass,
-            fast_model=fast_model,
-            refine_model=model_name,
-            refine_threshold=refine_threshold,
-        )
-        logs.append(lg)
-        transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
-        if srt_path:
-            srt_files.append(srt_path)
     combined = "\n\n".join(transcripts)
     if merge_flag:
         try:
-            out_doc = save_as_word(combined)
-            logs.append(f"Merged saved: {out_doc}")
         except Exception as e:
-            logs.append(f"Merge failed: {e}")
-    srt_return = srt_files[0] if srt_files else None
-    return combined, "\n".join(logs), out_doc, srt_return
-# ---------- Build Gradio UI (3.x compatible) ----------
 print("DEBUG: building Gradio UI", flush=True)
 available_choices, default_choice = safe_model_choices(prefer_default="small")
@@ -643,7 +771,7 @@ body { background: var(--bg); color: var(--text); font-family: Inter, system-ui,
 """
 with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
-    # Theme initializer + toggle injected via HTML (works across gradio versions)
     gr.HTML("""
     <script>
     (function() {
@@ -653,31 +781,22 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
         var chosen = null;
         if (saved === 'dark' || saved === 'light') {
           chosen = saved;
-        } else if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
-          chosen = 'dark';
         } else {
-          chosen = 'light';
         }
         document.documentElement.setAttribute('data-theme', chosen);
-        try {
-          var style = document.createElement('style');
-          style.innerHTML = `
-            :root, [data-theme="dark"] { transition: background-color 260ms ease, color 260ms ease; }
-          `;
-          document.head.appendChild(style);
-        } catch(e){}
       } catch (e) { console.warn('theme init failed', e); }
     })();
     </script>
     """)
-    # Header
     with gr.Row():
         with gr.Column(scale=0):
             gr.HTML("<div style='width:50px;height:50px;border-radius:10px;background:linear-gradient(135deg,#4f46e5,#06b6d4);display:flex;align-items:center;justify-content:center;color:white;font-weight:700;font-size:20px;'>WT</div>")
         with gr.Column():
             gr.Markdown("<h3 style='margin:0'>Whisper Transcriber (Gradio 3.x)</h3>")
-            gr.Markdown("<div class='small-note'>Chunked streaming, experimental CPU parallel, per-run ZIP extraction, memory corrections, SRT export, dark/light toggle</div>")
     with gr.Tabs():
         # Single audio
@@ -690,15 +809,12 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     device_choice = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
                     mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
                     srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
-                    # chunk controls
-                    chunk_controls_row = gr.Row(visible=True)
                     chunk_size_input = gr.Number(value=30, label="Chunk size (seconds)", precision=0)
                     enable_chunking = gr.Checkbox(label="Enable chunking (recommended for long files)", value=True)
-                    # parallel experimental
                     parallel_checkbox = gr.Checkbox(label="Enable experimental parallel chunk transcription (CPU only)", value=False)
                     parallel_workers = gr.Slider(minimum=1, maximum=max(1, os.cpu_count() or 4), value=2, step=1, label="Parallel workers (processes)")
                     use_two_pass_single = gr.Checkbox(label="Use two-pass speedup (fast then refine)", value=False)
-                    fast_model_choice = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model (for two-pass / workers)")
                     refine_threshold_single = gr.Number(value=-1.0, label="Refine threshold (avg_logprob)", precision=2)
                     transcribe_btn = gr.Button("Transcribe", variant="primary")
                 with gr.Column(scale=1):
@@ -708,7 +824,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     srt_download = gr.File(label="SRT (if generated)")
                     single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
-            # streaming generator with optional multiprocessing
             def _single_generator(audio_file, model_name, device, mem_on, srt_on, chunk_size_sec, chunking_enabled, parallel_enabled, workers, use_two_pass_flag, fast_model, refine_thresh):
                 yield 0, "", None, "Starting..."
                 try:
@@ -722,7 +837,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     wav = convert_to_wav_if_needed(path)
                     yield 8, "", None, f"Converted to WAV: {os.path.basename(wav)}"
-                    # determine duration
                     duration = None
                     try:
                         p = subprocess.run(["ffprobe","-v","error","-show_entries","format=duration","-of","default=noprint_wrappers=1:nokey=1", wav], capture_output=True, text=True, timeout=8)
@@ -737,7 +851,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                         except Exception:
                             duration = None
-                    # build chunk ranges
                     if chunking_enabled and (duration and duration > chunk_size_sec * 1.5):
                         num_chunks = max(1, int((duration + chunk_size_sec - 1) // chunk_size_sec))
                         chunk_ranges = []
@@ -752,28 +865,23 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     yield 10, "", None, f"Preparing transcription ({len(chunk_ranges)} chunk(s))..."
-                    # Load model in main process (for serial or orchestration)
                     model = get_whisper_model(model_name, device=None if device == "auto" else device)
                     yield 15, "", None, f"Model loaded: {model_name}"
                     overall_parts = []
                     total_chunks = len(chunk_ranges)
-                    # Decide whether we can/should run parallel workers
                     parallel_used = False
                     if parallel_enabled and chunking_enabled and total_chunks > 1:
                         if device != "cpu" and device != "auto":
-                            # Most likely GPU requested; parallel across multiple processes with GPU not recommended
                             yield 15, "", None, "Parallel mode requested but device is not 'cpu'. Falling back to serial chunking."
                             parallel_used = False
                         else:
-                            # attempt to spawn a multiprocessing pool that initializes each worker with fast_model on CPU
                             try:
                                 ctx = get_context("spawn")
                                 worker_count = max(1, int(workers))
                                 yield 18, "", None, f"Starting parallel pool with {worker_count} workers (fast_model={fast_model})..."
                                 pool = ctx.Pool(processes=worker_count, initializer=worker_init, initargs=(fast_model, "cpu"))
-                                # prepare chunk WAVs
                                 chunk_paths = []
                                 temp_chunk_files = []
                                 for (st, ed) in chunk_ranges:
@@ -783,11 +891,9 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                                         cw = trim_audio_segment(wav, st, ed)
                                         chunk_paths.append(cw)
                                         temp_chunk_files.append(cw)
-                                # map transcribe jobs
                                 results = pool.map(worker_transcribe_chunk, chunk_paths)
                                 pool.close()
                                 pool.join()
-                                # process results in order
                                 for idx, (txt, err) in enumerate(results, start=1):
                                     if err:
                                         yield int(20 + idx * 70 / max(1, total_chunks)), "\n\n".join(overall_parts), None, f"Chunk {idx} worker error: {err}"
@@ -798,7 +904,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                                         overall_parts.append(txt)
                                         prog = int(20 + idx * 70 / max(1, total_chunks))
                                         yield prog, "\n\n".join(overall_parts), None, f"Completed chunk {idx}/{total_chunks} (parallel)."
-                                # cleanup temp chunks (but not original wav)
                                 for tfile in temp_chunk_files:
                                     try:
                                         if os.path.exists(tfile):
@@ -811,7 +916,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                                 parallel_used = False
                     if not parallel_used:
-                        # serial chunk processing
                         for idx, (st, ed) in enumerate(chunk_ranges, start=1):
                             try:
                                 if ed is None:
@@ -823,9 +927,7 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                                 yield int(15 + (idx - 1) * 70 / max(1, total_chunks)), "", None, f"Transcribing chunk {idx}/{total_chunks} ({note})..."
-                                # call model.transcribe on chunk
-                                whisper_opts = {}
-                                result = model.transcribe(chunk_wav, **whisper_opts)
                                 chunk_text = result.get("text", "").strip()
                                 if mem_on:
@@ -845,7 +947,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                             except Exception as e:
                                 yield int(15 + idx * 70 / max(1, total_chunks)), "\n\n".join(overall_parts), None, f"Chunk {idx} failed: {e}\n{traceback.format_exc()}"
-                    # final assembly
                     final_text = "\n\n".join([p for p in overall_parts if p])
                     if mem_on:
                         try:
@@ -853,7 +954,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                         except Exception:
                             pass
-                    # SRT generation best-effort (runs a full transcribe to get segments)
                     srt_path = None
                     if srt_on:
                         try:
@@ -869,7 +969,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     yield 98, final_text, srt_path, "Transcription complete."
-                    # cleanup tmp wav if created
                     try:
                         if os.path.exists(wav) and not path.lower().endswith(".wav"):
                             os.unlink(wav)
@@ -887,7 +986,7 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                 outputs=[progress_num, transcript_out, srt_download, single_logs],
             )
-        # Batch tab (unchanged UI and behavior)
         with gr.TabItem("Batch Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -913,6 +1012,8 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     batch_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
                     batch_doc_download = gr.File(label="Merged DOCX (if created)")
                     batch_srt_download = gr.File(label="First SRT (if any)")
             def _do_extract(zip_file, password):
                 if not zip_file:
@@ -923,33 +1024,19 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
             batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
-            def _do_batch(friendly_selected, uploaded_files, model_name, device, merge_flag, mem_flag, srt_flag, use_two_pass_flag, fast_model, refine_thresh):
-                combined, logs, out_doc, srt_path = batch_transcribe(
-                    friendly_selected,
-                    uploaded_files,
-                    model_name,
-                    device,
-                    merge_flag,
-                    mem_flag,
-                    srt_flag,
-                    use_two_pass=use_two_pass_flag,
-                    fast_model=fast_model,
-                    refine_threshold=refine_thresh,
-                )
-                return combined, logs, out_doc, srt_path
             batch_run_btn.click(
-                fn=_do_batch,
                 inputs=[batch_select, batch_files, batch_model, batch_device, batch_merge, batch_mem, batch_srt, batch_use_two_pass, batch_fast_model, batch_refine_threshold],
-                outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download],
             )
-        # Memory tab (unchanged)
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("### Correction Memory")
-                    mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="filepath")
                     mem_import_btn = gr.Button("Import Memory")
                     mem_text = gr.Textbox(label="Add word/phrase", placeholder="Type word or phrase")
                     mem_add_btn = gr.Button("Add to Memory")
@@ -958,43 +1045,97 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     mem_status = gr.Textbox(label="Memory status / preview", lines=12, interactive=False)
             def _import_mem(uploaded):
                 if not uploaded:
-                    return "No file provided."
-                path = uploaded.name if hasattr(uploaded, "name") else str(uploaded)
-                try:
-                    with open(path, "r", encoding="utf-8") as fh:
-                        raw = fh.read()
                     parsed = None
                     try:
                         parsed = json.loads(raw)
                     except Exception:
                         parsed = None
                     if isinstance(parsed, dict):
                         with MEMORY_LOCK:
-                            for k, v in parsed.get("words", {}).items():
-                                memory["words"][k.lower()] = memory["words"].get(k.lower(), 0) + int(v)
-                            for k, v in parsed.get("phrases", {}).items():
-                                memory["phrases"][k] = memory["phrases"].get(k, 0) + int(v)
                             save_memory(memory)
-                        return f"Imported JSON memory (words={len(parsed.get('words', {}))}, phrases={len(parsed.get('phrases', {}))})."
                     lines = [l.strip() for l in raw.splitlines() if l.strip()]
-                    added = 0
                     with MEMORY_LOCK:
                         for line in lines:
                             if "," in line:
-                                k, c = line.split(",", 1)
                                 try:
-                                    cnt = int(c)
-                                except:
                                     cnt = 1
-                                memory["words"][k.lower()] = memory["words"].get(k.lower(), 0) + cnt
                             else:
-                                memory["words"][line.lower()] = memory["words"].get(line.lower(), 0) + 1
-                            added += 1
                         save_memory(memory)
-                    return f"Imported {added} entries."
-                except Exception as e:
-                    return f"Import failed: {e}"
             def _add_mem(entry):
                 if not entry or not entry.strip():
@@ -1035,7 +1176,7 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
             mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
-        # Settings tab (theme)
         with gr.TabItem("Settings"):
             with gr.Row():
                 with gr.Column():
@@ -1048,57 +1189,32 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     gr.HTML("""
                     <div style="display:flex;align-items:center;gap:12px;">
                       <button id="wt_theme_btn" style="display:flex;align-items:center;gap:8px;padding:8px 10px;border-radius:8px;border:1px solid rgba(0,0,0,0.06);background:var(--card);cursor:pointer;">
-                        <span id="wt_theme_icon" style="display:inline-flex;width:18px;height:18px;align-items:center;justify-content:center;"></span>
                         <span id="wt_theme_label" style="font-weight:600;">Toggle Theme</span>
                       </button>
-                      <div style="color:var(--muted);font-size:13px;">Theme preference saved in browser · <span id="wt_theme_hint">auto</span></div>
                     </div>
                     <script>
                     (function(){
                       try {
                         const root = document.documentElement;
                         const btn = document.getElementById('wt_theme_btn');
-                        const icon = document.getElementById('wt_theme_icon');
-                        const hint = document.getElementById('wt_theme_hint');
-                        function setIconFor(theme) {
-                          if (!icon) return;
-                          if (theme === 'dark') {
-                            icon.innerHTML = '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M21 12.79A9 9 0 1111.21 3 7 7 0 0021 12.79z" fill="currentColor"/></svg>';
-                          } else {
-                            icon.innerHTML = '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M12 4V2M12 22v-2M4.2 4.2L2.8 2.8M21.2 21.2l-1.4-1.4M4 12H2m20 0h-2M4.2 19.8L2.8 21.2M21.2 2.8L19.8 4.2" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/><circle cx="12" cy="12" r="3" fill="currentColor"/></svg>';
-                          }
-                        }
                         var saved = null;
                         try { saved = localStorage.getItem('wt_theme'); } catch(e){ saved = null; }
                         var effective = null;
                         if (saved === 'dark' || saved === 'light') {
                           effective = saved;
-                          hint.textContent = 'saved';
-                        } else if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
-                          effective = 'dark';
-                          hint.textContent = 'OS-prefer';
                         } else {
-                          effective = 'light';
-                          hint.textContent = 'OS-prefer';
                         }
                         root.setAttribute('data-theme', effective);
-                        setIconFor(effective);
                         btn.addEventListener('click', function(){
                           try {
                             const cur = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
                             root.setAttribute('data-theme', cur);
-                            try { localStorage.setItem('wt_theme', cur); hint.textContent = 'saved'; } catch(e){ hint.textContent = 'saved'; }
-                            setIconFor(cur);
-                          } catch(e){
-                            console.error(e);
-                          }
                         });
-                      } catch(e){
-                        console.warn('theme toggle init failed', e);
-                      }
                     })();
                     </script>
                     """)

 # app.py
 # Whisper Transcriber — Gradio 3.x compatible full file
+# Features: chunking, experimental parallel chunk transcription (CPU-only),
+# memory corrections, two-pass refine, per-run ZIP extraction & selection,
+# batch streaming with progress and per-file DOCX ZIP, in-memory buffer cache,
+# dark/light theme toggle (dark default).
 #
 # Requirements: gradio (3.x), whisper, pydub, pyzipper, python-docx, ffmpeg installed.
 import os
 import sys
 from uuid import uuid4
 from pathlib import Path
 from multiprocessing import get_context
+from collections import OrderedDict
+import hashlib
+import io
+import zipfile
 from typing import Tuple, List
 # Force unbuffered prints for logs
 MODEL_CACHE = {}
 EXTRACT_MAP = {}  # friendly_name -> absolute path
+# Buffer cache configuration (in-memory LRU)
+BUFFER_CACHE_MAX = 200  # tune to limit memory
+BUFFER_CACHE = OrderedDict()
 # ---------- Worker-global for multiprocessing ----------
+WORKER_MODEL = None  # loaded in worker processes
 def worker_init(model_name: str, device: str):
     """
             pass
         raise
+# ---------- Single-file transcription helper ----------
 def transcribe_single_file(
     path,
     model_name="small",
     refine_model=None,
     refine_threshold=-1.0,
 ):
     logs = []
     try:
         if not path:
                     pass
             return text, srt_path, "\n".join(logs)
+        # If use_two_pass was requested, we keep a simple fallback (advanced two-pass handled elsewhere)
+        return "", None, "Two-pass not implemented in this helper."
     except Exception as e:
         tb = traceback.format_exc()
         return "", None, f"Transcription error: {e}\n{tb}"
+# ---------- Buffer cache helpers ----------
+def make_cache_key(file_path: str, model_name: str, device: str, mem_on: bool, two_pass: bool, fast_model: str, refine_threshold: float):
+    try:
+        h = hashlib.sha256()
+        with open(str(file_path), "rb") as fh:
+            for chunk in iter(lambda: fh.read(8192), b""):
+                h.update(chunk)
+        file_hash = h.hexdigest()
+    except Exception:
+        file_hash = f"path:{str(file_path)}"
+    key = f"{file_hash}|model={model_name}|dev={device}|mem={int(mem_on)}|two={int(two_pass)}|fast={fast_model}|th={refine_threshold}"
+    return key
+def cache_put(key: str, value: dict):
+    try:
+        if key in BUFFER_CACHE:
+            BUFFER_CACHE.pop(key)
+        BUFFER_CACHE[key] = value
+        while len(BUFFER_CACHE) > BUFFER_CACHE_MAX:
+            BUFFER_CACHE.popitem(last=False)
+    except Exception:
+        pass
+def cache_get(key: str):
+    try:
+        val = BUFFER_CACHE.get(key)
+        if val:
+            BUFFER_CACHE.pop(key)
+            BUFFER_CACHE[key] = val
+        return val
+    except Exception:
+        return None
+def make_docx_bytes(text: str):
+    try:
+        bio = io.BytesIO()
+        doc = Document()
+        doc.add_paragraph(text)
+        doc.save(bio)
+        bio.seek(0)
+        return bio.read()
+    except Exception:
+        return None
+def make_zip_bytes(files_dict: dict):
+    bio = io.BytesIO()
+    with zipfile.ZipFile(bio, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+        for name, b in files_dict.items():
+            if isinstance(b, str):
+                b = b.encode("utf-8")
+            zf.writestr(name, b or b"")
+    bio.seek(0)
+    return bio.read()
+# ---------- Batch streaming transcription (new) ----------
+def batch_transcribe_stream(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
+    logs_lines = []
     transcripts = []
+    per_file_docx = {}
     srt_files = []
     paths = []
     if friendly_selected:
         for key in friendly_selected:
             p = EXTRACT_MAP.get(key)
             if p:
                 paths.append(p)
             else:
+                logs_lines.append(f"[WARN] Selected not found in extract map: {key}")
     if uploaded_files:
         if isinstance(uploaded_files, (list, tuple)):
             for f in uploaded_files:
                 paths.append(str(f))
         else:
             paths.append(str(uploaded_files))
     if not paths:
+        logs_lines.append("No files selected or uploaded.")
+        yield "", "\n".join(logs_lines), None, None, 100, None
+        return
     total = len(paths)
+    logs_lines.append(f"Batch: {total} file(s) to process.")
+    yield "", "\n".join(logs_lines), None, None, 2, None
     for idx, p in enumerate(paths, start=1):
+        try:
+            logs_lines.append(f"[{idx}/{total}] Checking cache for: {os.path.basename(p)}")
+            yield "\n\n".join(transcripts), "\n".join(logs_lines), None, None, int(5 + (idx - 1) * 80 / total), None
+            cache_key = make_cache_key(p, model_name, device_name, enable_mem, use_two_pass, fast_model, refine_threshold)
+            cached = cache_get(cache_key)
+            if cached:
+                logs_lines.append(f"[{idx}/{total}] Cache hit: returning cached transcription.")
+                text = cached.get("text", "")
+                transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
+                docx_b = cached.get("docx_bytes")
+                if docx_b:
+                    fname = f"{os.path.splitext(os.path.basename(p))[0]}.docx"
+                    per_file_docx[fname] = docx_b
+                srt_b = cached.get("srt_bytes")
+                if srt_b:
+                    srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
+                    with open(srt_fp, "wb") as fh:
+                        fh.write(srt_b)
+                    srt_files.append(srt_fp)
+                yield "\n\n".join(transcripts), "\n".join(logs_lines), None, (srt_files[0] if srt_files else None), int(5 + idx * 80 / total), None
+                continue
+            logs_lines.append(f"[{idx}/{total}] Transcribing: {p}")
+            yield "\n\n".join(transcripts), "\n".join(logs_lines), None, None, int(5 + (idx - 1) * 80 / total), None
+            text, srt_path, lg = transcribe_single_file(
+                p,
+                model_name=model_name,
+                device_choice=device_name,
+                enable_memory=enable_mem,
+                generate_srt=generate_srt,
+                use_two_pass=use_two_pass,
+                fast_model=fast_model,
+                refine_model=model_name,
+                refine_threshold=refine_threshold,
+            )
+            logs_lines.append(lg or "")
+            if not text:
+                logs_lines.append(f"[{idx}/{total}] No transcript returned or error for {p}.")
+            transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
+            docx_b = make_docx_bytes(text)
+            if docx_b:
+                fname = f"{os.path.splitext(os.path.basename(p))[0]}.docx"
+                per_file_docx[fname] = docx_b
+            srt_b = None
+            if srt_path and os.path.exists(srt_path):
+                with open(srt_path, "rb") as fh:
+                    srt_b = fh.read()
+                srt_files.append(srt_path)
+            cache_put(cache_key, {"text": text, "docx_bytes": docx_b, "srt_bytes": srt_b})
+            yield "\n\n".join(transcripts), "\n".join(logs_lines), None, (srt_files[0] if srt_files else None), int(5 + idx * 80 / total), None
+        except Exception as e:
+            logs_lines.append(f"[{idx}/{total}] Error processing {p}: {e}\n{traceback.format_exc()}")
+            yield "\n\n".join(transcripts), "\n".join(logs_lines), None, None, int(5 + idx * 80 / total), None
+            continue
     combined = "\n\n".join(transcripts)
+    merged_docx_path = None
     if merge_flag:
         try:
+            merged_docx_path = save_as_word(combined)
+            logs_lines.append(f"Merged DOCX written: {merged_docx_path}")
         except Exception as e:
+            logs_lines.append(f"Could not write merged DOCX: {e}")
+    per_files_zip_path = None
+    if per_file_docx:
+        try:
+            zip_bytes = make_zip_bytes(per_file_docx)
+            zip_fp = os.path.join(tempfile.gettempdir(), f"batch_docx_{uuid4().hex}.zip")
+            with open(zip_fp, "wb") as zf:
+                zf.write(zip_bytes)
+            per_files_zip_path = zip_fp
+            logs_lines.append(f"Per-file DOCX ZIP created: {per_files_zip_path}")
+        except Exception as e:
+            logs_lines.append(f"Failed to create per-file DOCX ZIP: {e}")
+    yield combined, "\n".join(logs_lines), merged_docx_path, (srt_files[0] if srt_files else None), 100, per_files_zip_path
+# ---------- Build Gradio UI ----------
 print("DEBUG: building Gradio UI", flush=True)
 available_choices, default_choice = safe_model_choices(prefer_default="small")
 """
 with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
+    # Theme initializer: default to dark unless user has saved a preference
     gr.HTML("""
     <script>
     (function() {
         var chosen = null;
         if (saved === 'dark' || saved === 'light') {
           chosen = saved;
         } else {
+          // default to dark unless user explicitly chose otherwise earlier
+          chosen = 'dark';
         }
         document.documentElement.setAttribute('data-theme', chosen);
       } catch (e) { console.warn('theme init failed', e); }
     })();
     </script>
     """)
     with gr.Row():
         with gr.Column(scale=0):
             gr.HTML("<div style='width:50px;height:50px;border-radius:10px;background:linear-gradient(135deg,#4f46e5,#06b6d4);display:flex;align-items:center;justify-content:center;color:white;font-weight:700;font-size:20px;'>WT</div>")
         with gr.Column():
             gr.Markdown("<h3 style='margin:0'>Whisper Transcriber (Gradio 3.x)</h3>")
+            gr.Markdown("<div class='small-note'>Chunked streaming, experimental CPU parallel, per-run ZIP extraction, memory corrections, SRT export, dark theme default</div>")
     with gr.Tabs():
         # Single audio
                     device_choice = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
                     mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
                     srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
                     chunk_size_input = gr.Number(value=30, label="Chunk size (seconds)", precision=0)
                     enable_chunking = gr.Checkbox(label="Enable chunking (recommended for long files)", value=True)
                     parallel_checkbox = gr.Checkbox(label="Enable experimental parallel chunk transcription (CPU only)", value=False)
                     parallel_workers = gr.Slider(minimum=1, maximum=max(1, os.cpu_count() or 4), value=2, step=1, label="Parallel workers (processes)")
                     use_two_pass_single = gr.Checkbox(label="Use two-pass speedup (fast then refine)", value=False)
+                    fast_model_choice = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model")
                     refine_threshold_single = gr.Number(value=-1.0, label="Refine threshold (avg_logprob)", precision=2)
                     transcribe_btn = gr.Button("Transcribe", variant="primary")
                 with gr.Column(scale=1):
                     srt_download = gr.File(label="SRT (if generated)")
                     single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
             def _single_generator(audio_file, model_name, device, mem_on, srt_on, chunk_size_sec, chunking_enabled, parallel_enabled, workers, use_two_pass_flag, fast_model, refine_thresh):
                 yield 0, "", None, "Starting..."
                 try:
                     wav = convert_to_wav_if_needed(path)
                     yield 8, "", None, f"Converted to WAV: {os.path.basename(wav)}"
                     duration = None
                     try:
                         p = subprocess.run(["ffprobe","-v","error","-show_entries","format=duration","-of","default=noprint_wrappers=1:nokey=1", wav], capture_output=True, text=True, timeout=8)
                         except Exception:
                             duration = None
                     if chunking_enabled and (duration and duration > chunk_size_sec * 1.5):
                         num_chunks = max(1, int((duration + chunk_size_sec - 1) // chunk_size_sec))
                         chunk_ranges = []
                     yield 10, "", None, f"Preparing transcription ({len(chunk_ranges)} chunk(s))..."
                     model = get_whisper_model(model_name, device=None if device == "auto" else device)
                     yield 15, "", None, f"Model loaded: {model_name}"
                     overall_parts = []
                     total_chunks = len(chunk_ranges)
                     parallel_used = False
                     if parallel_enabled and chunking_enabled and total_chunks > 1:
                         if device != "cpu" and device != "auto":
                             yield 15, "", None, "Parallel mode requested but device is not 'cpu'. Falling back to serial chunking."
                             parallel_used = False
                         else:
                             try:
                                 ctx = get_context("spawn")
                                 worker_count = max(1, int(workers))
                                 yield 18, "", None, f"Starting parallel pool with {worker_count} workers (fast_model={fast_model})..."
                                 pool = ctx.Pool(processes=worker_count, initializer=worker_init, initargs=(fast_model, "cpu"))
                                 chunk_paths = []
                                 temp_chunk_files = []
                                 for (st, ed) in chunk_ranges:
                                         cw = trim_audio_segment(wav, st, ed)
                                         chunk_paths.append(cw)
                                         temp_chunk_files.append(cw)
                                 results = pool.map(worker_transcribe_chunk, chunk_paths)
                                 pool.close()
                                 pool.join()
                                 for idx, (txt, err) in enumerate(results, start=1):
                                     if err:
                                         yield int(20 + idx * 70 / max(1, total_chunks)), "\n\n".join(overall_parts), None, f"Chunk {idx} worker error: {err}"
                                         overall_parts.append(txt)
                                         prog = int(20 + idx * 70 / max(1, total_chunks))
                                         yield prog, "\n\n".join(overall_parts), None, f"Completed chunk {idx}/{total_chunks} (parallel)."
                                 for tfile in temp_chunk_files:
                                     try:
                                         if os.path.exists(tfile):
                                 parallel_used = False
                     if not parallel_used:
                         for idx, (st, ed) in enumerate(chunk_ranges, start=1):
                             try:
                                 if ed is None:
                                 yield int(15 + (idx - 1) * 70 / max(1, total_chunks)), "", None, f"Transcribing chunk {idx}/{total_chunks} ({note})..."
+                                result = model.transcribe(chunk_wav)
                                 chunk_text = result.get("text", "").strip()
                                 if mem_on:
                             except Exception as e:
                                 yield int(15 + idx * 70 / max(1, total_chunks)), "\n\n".join(overall_parts), None, f"Chunk {idx} failed: {e}\n{traceback.format_exc()}"
                     final_text = "\n\n".join([p for p in overall_parts if p])
                     if mem_on:
                         try:
                         except Exception:
                             pass
                     srt_path = None
                     if srt_on:
                         try:
                     yield 98, final_text, srt_path, "Transcription complete."
                     try:
                         if os.path.exists(wav) and not path.lower().endswith(".wav"):
                             os.unlink(wav)
                 outputs=[progress_num, transcript_out, srt_download, single_logs],
             )
+        # Batch tab
         with gr.TabItem("Batch Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
                     batch_logs = gr.Textbox(label="Logs", lines=10, interactive=False)
                     batch_doc_download = gr.File(label="Merged DOCX (if created)")
                     batch_srt_download = gr.File(label="First SRT (if any)")
+                    batch_progress = gr.Slider(minimum=0, maximum=100, value=0, step=1, label="Batch Progress (%)", interactive=False)
+                    batch_files_zip = gr.File(label="Download per-file DOCX ZIP (all files)", interactive=False)
             def _do_extract(zip_file, password):
                 if not zip_file:
             batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
             batch_run_btn.click(
+                fn=batch_transcribe_stream,
                 inputs=[batch_select, batch_files, batch_model, batch_device, batch_merge, batch_mem, batch_srt, batch_use_two_pass, batch_fast_model, batch_refine_threshold],
+                outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download, batch_progress, batch_files_zip],
             )
+        # Memory tab
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("### Correction Memory")
+                    # Allow multiple files upload for memory import
+                    mem_upload = gr.File(label="Import memory file(s) (JSON or text)", file_count="multiple", type="filepath")
                     mem_import_btn = gr.Button("Import Memory")
                     mem_text = gr.Textbox(label="Add word/phrase", placeholder="Type word or phrase")
                     mem_add_btn = gr.Button("Add to Memory")
                     mem_status = gr.Textbox(label="Memory status / preview", lines=12, interactive=False)
             def _import_mem(uploaded):
+                """
+                Accepts uploaded which may be:
+                  - None
+                  - a single file-like object / path
+                  - a list of file-like objects / paths
+                Processes each file and merges into memory (words & phrases).
+                """
                 if not uploaded:
+                    return "No file(s) provided."
+                files = []
+                # Normalize uploaded to list of paths
+                if isinstance(uploaded, (list, tuple)):
+                    for item in uploaded:
+                        if not item:
+                            continue
+                        path = item.name if hasattr(item, "name") else str(item)
+                        files.append(path)
+                else:
+                    path = uploaded.name if hasattr(uploaded, "name") else str(uploaded)
+                    files.append(path)
+                total_added = 0
+                merged_words = 0
+                merged_phrases = 0
+                errors = []
+                for path in files:
+                    try:
+                        with open(path, "r", encoding="utf-8") as fh:
+                            raw = fh.read()
+                    except Exception as e:
+                        errors.append(f"Failed to read {path}: {e}")
+                        continue
                     parsed = None
                     try:
                         parsed = json.loads(raw)
                     except Exception:
                         parsed = None
                     if isinstance(parsed, dict):
                         with MEMORY_LOCK:
+                            pw = parsed.get("words", {})
+                            pp = parsed.get("phrases", {})
+                            for k, v in pw.items():
+                                try:
+                                    memory["words"][k.lower()] = memory["words"].get(k.lower(), 0) + int(v)
+                                except Exception:
+                                    memory["words"][k.lower()] = memory["words"].get(k.lower(), 0) + 1
+                                merged_words += 1
+                            for k, v in pp.items():
+                                try:
+                                    memory["phrases"][k] = memory["phrases"].get(k, 0) + int(v)
+                                except Exception:
+                                    memory["phrases"][k] = memory["phrases"].get(k, 0) + 1
+                                merged_phrases += 1
                             save_memory(memory)
+                        total_added += (len(pw) + len(pp))
+                        continue
+                    # fallback to newline parsing
                     lines = [l.strip() for l in raw.splitlines() if l.strip()]
+                    added_here = 0
                     with MEMORY_LOCK:
                         for line in lines:
                             if "," in line:
+                                parts = [p.strip() for p in line.split(",", 1)]
+                                key = parts[0].lower()
                                 try:
+                                    cnt = int(parts[1])
+                                except Exception:
                                     cnt = 1
+                                memory["words"][key] = memory["words"].get(key, 0) + cnt
+                                merged_words += 1
                             else:
+                                # short lines -> words, longer -> phrase
+                                if len(line.split()) <= 3:
+                                    memory["words"][line.lower()] = memory["words"].get(line.lower(), 0) + 1
+                                    merged_words += 1
+                                else:
+                                    memory["phrases"][line] = memory["phrases"].get(line, 0) + 1
+                                    merged_phrases += 1
+                            added_here += 1
                         save_memory(memory)
+                    total_added += added_here
+                msg_parts = [f"Imported {total_added} entries ({merged_words} words, {merged_phrases} phrases)."]
+                if errors:
+                    msg_parts.append("Errors:")
+                    msg_parts.extend(errors)
+                return "\n".join(msg_parts)
             def _add_mem(entry):
                 if not entry or not entry.strip():
             mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
+        # Settings tab
         with gr.TabItem("Settings"):
             with gr.Row():
                 with gr.Column():
                     gr.HTML("""
                     <div style="display:flex;align-items:center;gap:12px;">
                       <button id="wt_theme_btn" style="display:flex;align-items:center;gap:8px;padding:8px 10px;border-radius:8px;border:1px solid rgba(0,0,0,0.06);background:var(--card);cursor:pointer;">
                         <span id="wt_theme_label" style="font-weight:600;">Toggle Theme</span>
                       </button>
+                      <div style="color:var(--muted);font-size:13px;">Theme preference saved in browser</div>
                     </div>
                     <script>
                     (function(){
                       try {
                         const root = document.documentElement;
                         const btn = document.getElementById('wt_theme_btn');
                         var saved = null;
                         try { saved = localStorage.getItem('wt_theme'); } catch(e){ saved = null; }
                         var effective = null;
                         if (saved === 'dark' || saved === 'light') {
                           effective = saved;
                         } else {
+                          effective = 'dark';
                         }
                         root.setAttribute('data-theme', effective);
                         btn.addEventListener('click', function(){
                           try {
                             const cur = root.getAttribute('data-theme') === 'dark' ? 'light' : 'dark';
                             root.setAttribute('data-theme', cur);
+                            try { localStorage.setItem('wt_theme', cur); } catch(e){}
+                          } catch(e){ console.error(e); }
                         });
+                      } catch(e){}
                     })();
                     </script>
                     """)