moderntranscribe

Sleeping

App Files Files Community

staraks commited on Nov 21, 2025

Commit

bf79b27

verified ·

1 Parent(s): cf38d7d

Update app.py

Browse files

Files changed (1) hide show

app.py +292 -44

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # app.py
-# Whisper Transcriber — Full improved app.py with Dark/Light toggle
 # Requirements: gradio, whisper, pydub, pyzipper, python-docx, ffmpeg
 import os
@@ -12,7 +12,9 @@ import traceback
 import threading
 import re
 from difflib import get_close_matches
 from pathlib import Path
 # Force unbuffered prints for logs
 os.environ["PYTHONUNBUFFERED"] = "1"
@@ -386,19 +388,19 @@ def segments_to_srt(segments):
     return "\n".join(lines)
-# ---------- ZIP extraction + mapping for UI ----------
 def extract_zip_and_map(zip_path, zip_password=None):
     global EXTRACT_MAP
     EXTRACT_MAP = {}
-    temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
     try:
-        if os.path.exists(temp_extract_dir):
-            try:
-                shutil.rmtree(temp_extract_dir)
-            except Exception:
-                pass
         os.makedirs(temp_extract_dir, exist_ok=True)
-        logs = []
         with pyzipper.ZipFile(zip_path, "r") as zf:
             if zip_password:
                 try:
@@ -442,53 +444,251 @@ def extract_zip_and_map(zip_path, zip_password=None):
         return friendly, "\n".join(logs)
     except Exception as e:
         traceback.print_exc()
         return [], f"Extraction failed: {e}"
-# ---------- Single-file transcribe (with SRT option) ----------
-def transcribe_single_file(path, model_name="small", device_choice="auto", enable_memory=False, generate_srt=False):
     logs = []
     try:
         if not path:
-            return None, "", "No file provided."
         p = path.name if hasattr(path, "name") else str(path)
         device = None if device_choice == "auto" else device_choice
-        model = get_whisper_model(model_name, device=device)
-        logs.append(f"Loaded model: {model_name}")
         wav = convert_to_wav_if_needed(p)
         logs.append(f"Converted to WAV: {os.path.basename(wav)}")
-        result = model.transcribe(wav)
-        text = result.get("text", "").strip()
         if enable_memory:
-            text = memory_correct_text(text)
-        text = postprocess_transcript(text)
         srt_path = None
-        if generate_srt and result.get("segments"):
-            srt_text = segments_to_srt(result["segments"])
-            srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
             with open(srt_fp, "w", encoding="utf-8") as fh:
                 fh.write(srt_text)
             srt_path = srt_fp
             logs.append(f"SRT generated: {srt_path}")
-        if enable_memory:
-            try:
-                update_memory_with_transcript(text)
-                logs.append("Memory updated.")
-            except Exception:
-                pass
         if wav and os.path.exists(wav) and wav != p:
             try:
                 os.unlink(wav)
             except Exception:
                 pass
-        return text, srt_path, "\n".join(logs)
     except Exception as e:
         tb = traceback.format_exc()
         return "", None, f"Transcription error: {e}\n{tb}"
-# ---------- Batch transcribe (maps friendly names to real paths) ----------
-def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt):
     logs = []
     transcripts = []
     srt_files = []
@@ -513,7 +713,17 @@ def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name,
     total = len(paths)
     for idx, p in enumerate(paths, start=1):
         logs.append(f"[{idx}/{total}] Processing: {p}")
-        text, srt_path, lg = transcribe_single_file(p, model_name=model_name, device_choice=device_name, enable_memory=enable_mem, generate_srt=generate_srt)
         logs.append(lg)
         transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
         if srt_path:
@@ -561,7 +771,7 @@ body { background: var(--bg); color: var(--text); font-family: Inter, system-ui,
 .small-note { color:var(--muted); font-size:12px;}
 """
-with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
     # apply saved theme early
     gr.HTML("""
     <script>
@@ -585,7 +795,7 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
             gr.HTML("<div class='app-icon'>WT</div>")
         with gr.Column():
             gr.Markdown("<h3 style='margin:0'>Whisper Transcriber — improved</h3>")
-            gr.Markdown("<div class='small-note'>Per-file selection after unzip, SRT export, model availability checks, dark/light toggle.</div>")
     with gr.Tabs():
         # Single Audio Tab
@@ -601,6 +811,10 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
                         with gr.Row():
                             mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
                             srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
                         transcribe_btn = gr.Button("Transcribe", variant="primary")
                 with gr.Column(scale=1):
                     with gr.Group(elem_classes="card"):
@@ -610,15 +824,29 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
                         srt_download = gr.File(label="SRT (if generated / available)")
                         single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
-            def _single_action(audio_file, model_name, device, mem_on, srt_on):
                 if not audio_file:
                     return None, "", None, "No audio file provided."
                 path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
-                text, srt_path, logs = transcribe_single_file(path, model_name=model_name, device_choice=device, enable_memory=mem_on, generate_srt=srt_on)
                 preview = audio_file
                 return preview, text, srt_path, logs
-            transcribe_btn.click(fn=_single_action, inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle], outputs=[audio_preview, transcript_out, srt_download, single_logs])
         # Batch Tab
         with gr.TabItem("Batch Transcribe"):
@@ -638,6 +866,10 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
                         batch_merge = gr.Checkbox(label="Merge transcripts to DOCX", value=True)
                         batch_mem = gr.Checkbox(label="Enable memory corrections", value=False)
                         batch_srt = gr.Checkbox(label="Generate SRT(s) if available", value=False)
                         batch_run_btn = gr.Button("Start Batch Transcription", variant="primary")
                 with gr.Column(scale=1):
                     with gr.Group(elem_classes="card"):
@@ -649,18 +881,34 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
             def _do_extract(zip_file, password):
                 if not zip_file:
-                    return [], "No ZIP provided."
                 zip_path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
                 friendly, logs = extract_zip_and_map(zip_path, password)
-                return friendly, logs
             batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
-            def _do_batch(friendly_selected, uploaded_files, model_name, device, merge_flag, mem_flag, srt_flag):
-                combined, logs, out_doc, srt_path = batch_transcribe(friendly_selected, uploaded_files, model_name, device, merge_flag, mem_flag, srt_flag)
                 return combined, logs, out_doc, srt_path
-            batch_run_btn.click(fn=_do_batch, inputs=[batch_select, batch_files, batch_model, batch_device, batch_merge, batch_mem, batch_srt], outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download])
         # Memory Tab
         with gr.TabItem("Memory"):
@@ -761,13 +1009,13 @@ with gr.Blocks(title="Whisper Transcriber (dark/light)", css=CSS) as demo:
                     with gr.Group(elem_classes="card"):
                         gr.Markdown("### Runtime & tips")
                         gr.Markdown("- Use `large-v3` only if your whisper package supports it.")
-                        gr.Markdown("- Extraction writes to system temp `extracted_audio`. Re-extracting overwrites it.")
-                        gr.Markdown("- Provide `fine_tune.py` if you plan to use the Fine-tune workflow.")
                 with gr.Column():
                     with gr.Group(elem_classes="card"):
                         gr.Markdown("### Theme")
                         theme_toggle = gr.Button("Toggle Dark / Light Theme")
-                        theme_note = gr.Markdown("Theme preference is saved in your browser (localStorage).")
                         gr.Markdown("### Diagnostics")
                         diag_btn = gr.Button("Show memory summary")
                         diag_out = gr.Textbox(label="Diagnostics", lines=12, interactive=False)

 # app.py
+# Whisper Transcriber — Fixed: per-run extract dirs + CheckboxGroup update + misc imports
 # Requirements: gradio, whisper, pydub, pyzipper, python-docx, ffmpeg
 import os
 import threading
 import re
 from difflib import get_close_matches
+from uuid import uuid4
 from pathlib import Path
+from difflib import get_close_matches
 # Force unbuffered prints for logs
 os.environ["PYTHONUNBUFFERED"] = "1"
     return "\n".join(lines)
+# ---------- ZIP extraction + mapping for UI (per-run temp dir) ----------
 def extract_zip_and_map(zip_path, zip_password=None):
+    """
+    Extract to a unique per-run temp directory and populate EXTRACT_MAP with absolute paths.
+    Returns (friendly_list, logs)
+    """
     global EXTRACT_MAP
     EXTRACT_MAP = {}
+    run_id = uuid4().hex
+    temp_extract_dir = os.path.join(tempfile.gettempdir(), f"extracted_audio_{run_id}")
+    logs = []
     try:
         os.makedirs(temp_extract_dir, exist_ok=True)
         with pyzipper.ZipFile(zip_path, "r") as zf:
             if zip_password:
                 try:
         return friendly, "\n".join(logs)
     except Exception as e:
         traceback.print_exc()
+        # on failure, attempt cleanup
+        try:
+            if os.path.exists(temp_extract_dir):
+                shutil.rmtree(temp_extract_dir)
+        except Exception:
+            pass
         return [], f"Extraction failed: {e}"
+# ---------- Audio trimming helper for two-pass ----------
+def trim_audio_segment(src_path, start_sec, end_sec):
+    """
+    Extract a short audio segment [start_sec, end_sec] to a temp wav file using ffmpeg.
+    Returns path to wav or raises exception.
+    """
+    src = str(src_path)
+    out_tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    out_tmp.close()
+    out_path = out_tmp.name
+    try:
+        cmd = [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-y",
+            "-ss",
+            str(start_sec),
+            "-to",
+            str(end_sec),
+            "-i",
+            src,
+            "-ar",
+            "16000",
+            "-ac",
+            "1",
+            out_path,
+        ]
+        proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
+        if proc.returncode != 0 or not os.path.exists(out_path) or os.path.getsize(out_path) < MIN_WAV_SIZE:
+            try:
+                if os.path.exists(out_path):
+                    os.unlink(out_path)
+            except Exception:
+                pass
+            raise Exception(f"ffmpeg trim failed: {proc.stderr or proc.stdout}")
+        return out_path
+    except Exception as e:
+        try:
+            if os.path.exists(out_path):
+                os.unlink(out_path)
+        except Exception:
+            pass
+        raise
+# ---------- Transcribe single file (supports two-pass) ----------
+def transcribe_single_file(
+    path,
+    model_name="small",
+    device_choice="auto",
+    enable_memory=False,
+    generate_srt=False,
+    use_two_pass=False,
+    fast_model="small",
+    refine_model=None,
+    refine_threshold=-1.0,
+):
+    """
+    If use_two_pass is True:
+      1) run fast_model for quick pass
+      2) apply memory corrections
+      3) for segments with avg_logprob < refine_threshold re-run refine_model on trimmed audio
+      4) recombine segments, apply memory, output text and optional SRT
+    """
     logs = []
     try:
         if not path:
+            return None, None, "No file provided."
         p = path.name if hasattr(path, "name") else str(path)
         device = None if device_choice == "auto" else device_choice
+        # If not using two-pass, keep old behavior
+        if not use_two_pass:
+            model = get_whisper_model(model_name, device=device)
+            logs.append(f"Loaded model: {model_name}")
+            wav = convert_to_wav_if_needed(p)
+            logs.append(f"Converted to WAV: {os.path.basename(wav)}")
+            result = model.transcribe(wav)
+            text = result.get("text", "").strip()
+            if enable_memory:
+                text = memory_correct_text(text)
+            text = postprocess_transcript(text)
+            srt_path = None
+            if generate_srt and result.get("segments"):
+                srt_text = segments_to_srt(result["segments"])
+                srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
+                with open(srt_fp, "w", encoding="utf-8") as fh:
+                    fh.write(srt_text)
+                srt_path = srt_fp
+                logs.append(f"SRT generated: {srt_path}")
+            if enable_memory:
+                try:
+                    update_memory_with_transcript(text)
+                    logs.append("Memory updated.")
+                except Exception:
+                    pass
+            if wav and os.path.exists(wav) and wav != p:
+                try:
+                    os.unlink(wav)
+                except Exception:
+                    pass
+            return text, srt_path, "\n".join(logs)
+        # ---------------- Two-pass flow ----------------
+        if refine_model is None:
+            refine_model = model_name
+        logs.append(f"Two-pass enabled: fast_model={fast_model}, refine_model={refine_model}, threshold={refine_threshold}")
+        # 1) fast pass
+        fast = get_whisper_model(fast_model, device=device)
+        logs.append(f"Loaded fast model: {fast_model}")
         wav = convert_to_wav_if_needed(p)
         logs.append(f"Converted to WAV: {os.path.basename(wav)}")
+        fast_result = fast.transcribe(wav)
+        segments = fast_result.get("segments") or []
+        # fallback: no segments -> treat as single text
+        if not segments:
+            text = fast_result.get("text", "").strip()
+            if enable_memory:
+                text = memory_correct_text(text)
+                update_memory_with_transcript(text)
+            text = postprocess_transcript(text)
+            srt_ret = None
+            if generate_srt and fast_result.get("segments"):
+                srt_text = segments_to_srt(fast_result["segments"])
+                srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}.srt")
+                with open(srt_fp, "w", encoding="utf-8") as fh:
+                    fh.write(srt_text)
+                srt_ret = srt_fp
+                logs.append(f"SRT generated: {srt_fp}")
+            if wav and os.path.exists(wav) and wav != p:
+                try:
+                    os.unlink(wav)
+                except Exception:
+                    pass
+            return text, srt_ret, "\n".join(logs)
+        # 2) memory-correct segments and tag low-confidence ones
+        refined_segments = []
+        segments_to_refine = []
+        for seg in segments:
+            seg_text = seg.get("text", "").strip()
+            if enable_memory:
+                corrected = memory_correct_text(seg_text)
+            else:
+                corrected = seg_text
+            seg_copy = dict(seg)
+            seg_copy["text"] = corrected
+            refined_segments.append(seg_copy)
+            avg_lp = seg.get("avg_logprob", None)
+            if avg_lp is None:
+                continue
+            try:
+                if float(avg_lp) < float(refine_threshold):
+                    segments_to_refine.append(seg_copy)
+            except Exception:
+                continue
+        logs.append(f"Fast pass produced {len(segments)} segments; {len(segments_to_refine)} queued for refinement.")
+        # 3) refine low-confidence segments
+        if segments_to_refine:
+            refine = get_whisper_model(refine_model, device=device)
+            logs.append(f"Loaded refine model: {refine_model}")
+            for seg in segments_to_refine:
+                start = seg.get("start", 0.0)
+                end = seg.get("end", start + seg.get("duration", 0.0))
+                if end <= start:
+                    continue
+                try:
+                    seg_wav = trim_audio_segment(wav, start, end)
+                    r_result = refine.transcribe(seg_wav)
+                    new_text = r_result.get("text", "").strip()
+                    if enable_memory:
+                        new_text = memory_correct_text(new_text)
+                    # update matching segment by start/end
+                    for rs in refined_segments:
+                        if abs(rs.get("start", 0.0) - start) < 0.001 and abs(rs.get("end", 0.0) - end) < 0.001:
+                            rs["text"] = new_text
+                            if r_result.get("segments"):
+                                rs["avg_logprob"] = r_result["segments"][0].get("avg_logprob", rs.get("avg_logprob"))
+                            break
+                    try:
+                        if os.path.exists(seg_wav):
+                            os.unlink(seg_wav)
+                    except Exception:
+                        pass
+                except Exception as e:
+                    logs.append(f"Refine failed for segment {start}-{end}: {e}")
+                    continue
+        # 4) recombine segments
+        full_text_parts = [s.get("text", "").strip() for s in sorted(refined_segments, key=lambda x: x.get("start", 0.0))]
+        combined_text = " ".join([p for p in full_text_parts if p])
         if enable_memory:
+            combined_text = memory_correct_text(combined_text)
+            try:
+                update_memory_with_transcript(combined_text)
+                logs.append("Memory updated.")
+            except Exception:
+                pass
+        combined_text = postprocess_transcript(combined_text)
+        # 5) generate SRT if requested
         srt_path = None
+        if generate_srt:
+            srt_segs = []
+            for rs in sorted(refined_segments, key=lambda x: x.get("start", 0.0)):
+                srt_segs.append({"start": rs.get("start", 0.0), "end": rs.get("end", 0.0), "text": rs.get("text", "")})
+            srt_text = segments_to_srt(srt_segs)
+            srt_fp = os.path.join(tempfile.gettempdir(), f"{os.path.splitext(os.path.basename(p))[0]}_two_pass.srt")
             with open(srt_fp, "w", encoding="utf-8") as fh:
                 fh.write(srt_text)
             srt_path = srt_fp
             logs.append(f"SRT generated: {srt_path}")
         if wav and os.path.exists(wav) and wav != p:
             try:
                 os.unlink(wav)
             except Exception:
                 pass
+        return combined_text, srt_path, "\n".join(logs)
     except Exception as e:
         tb = traceback.format_exc()
         return "", None, f"Transcription error: {e}\n{tb}"
+# ---------- Batch transcribe (uses transcribe_single_file's two-pass when requested) ----------
+def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
     logs = []
     transcripts = []
     srt_files = []
     total = len(paths)
     for idx, p in enumerate(paths, start=1):
         logs.append(f"[{idx}/{total}] Processing: {p}")
+        text, srt_path, lg = transcribe_single_file(
+            p,
+            model_name=model_name,
+            device_choice=device_name,
+            enable_memory=enable_mem,
+            generate_srt=generate_srt,
+            use_two_pass=use_two_pass,
+            fast_model=fast_model,
+            refine_model=model_name,
+            refine_threshold=refine_threshold,
+        )
         logs.append(lg)
         transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
         if srt_path:
 .small-note { color:var(--muted); font-size:12px;}
 """
+with gr.Blocks(title="Whisper Transcriber (dark/light + two-pass)", css=CSS) as demo:
     # apply saved theme early
     gr.HTML("""
     <script>
             gr.HTML("<div class='app-icon'>WT</div>")
         with gr.Column():
             gr.Markdown("<h3 style='margin:0'>Whisper Transcriber — improved</h3>")
+            gr.Markdown("<div class='small-note'>Two-pass speedup, per-file selection after unzip, SRT export, model availability checks, dark/light toggle.</div>")
     with gr.Tabs():
         # Single Audio Tab
                         with gr.Row():
                             mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
                             srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
+                        with gr.Row():
+                            use_two_pass_single = gr.Checkbox(label="Use two-pass speedup (fast then refine)", value=False)
+                            fast_model_choice = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model")
+                        refine_threshold_single = gr.Number(value=-1.0, label="Refine threshold (avg_logprob) — lower is stricter", precision=2)
                         transcribe_btn = gr.Button("Transcribe", variant="primary")
                 with gr.Column(scale=1):
                     with gr.Group(elem_classes="card"):
                         srt_download = gr.File(label="SRT (if generated / available)")
                         single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
+            def _single_action(audio_file, model_name, device, mem_on, srt_on, use_two_pass_flag, fast_model, refine_thresh):
                 if not audio_file:
                     return None, "", None, "No audio file provided."
                 path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
+                text, srt_path, logs = transcribe_single_file(
+                    path,
+                    model_name=model_name,
+                    device_choice=device,
+                    enable_memory=mem_on,
+                    generate_srt=srt_on,
+                    use_two_pass=use_two_pass_flag,
+                    fast_model=fast_model,
+                    refine_model=model_name,
+                    refine_threshold=refine_thresh,
+                )
                 preview = audio_file
                 return preview, text, srt_path, logs
+            transcribe_btn.click(
+                fn=_single_action,
+                inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle, use_two_pass_single, fast_model_choice, refine_threshold_single],
+                outputs=[audio_preview, transcript_out, srt_download, single_logs],
+            )
         # Batch Tab
         with gr.TabItem("Batch Transcribe"):
                         batch_merge = gr.Checkbox(label="Merge transcripts to DOCX", value=True)
                         batch_mem = gr.Checkbox(label="Enable memory corrections", value=False)
                         batch_srt = gr.Checkbox(label="Generate SRT(s) if available", value=False)
+                        with gr.Row():
+                            batch_use_two_pass = gr.Checkbox(label="Use two-pass speedup", value=False)
+                            batch_fast_model = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model")
+                        batch_refine_threshold = gr.Number(value=-1.0, label="Refine threshold (avg_logprob)", precision=2)
                         batch_run_btn = gr.Button("Start Batch Transcription", variant="primary")
                 with gr.Column(scale=1):
                     with gr.Group(elem_classes="card"):
             def _do_extract(zip_file, password):
                 if not zip_file:
+                    return gr.CheckboxGroup.update(choices=[]), "No ZIP provided."
                 zip_path = zip_file.name if hasattr(zip_file, "name") else str(zip_file)
                 friendly, logs = extract_zip_and_map(zip_path, password)
+                # return a component update so the CheckboxGroup shows new choices reliably
+                return gr.CheckboxGroup.update(choices=friendly), logs
             batch_extract_btn.click(fn=_do_extract, inputs=[batch_zip, zip_password], outputs=[batch_select, batch_extract_logs])
+            def _do_batch(friendly_selected, uploaded_files, model_name, device, merge_flag, mem_flag, srt_flag, use_two_pass_flag, fast_model, refine_thresh):
+                combined, logs, out_doc, srt_path = batch_transcribe(
+                    friendly_selected,
+                    uploaded_files,
+                    model_name,
+                    device,
+                    merge_flag,
+                    mem_flag,
+                    srt_flag,
+                    use_two_pass=use_two_pass_flag,
+                    fast_model=fast_model,
+                    refine_threshold=refine_thresh,
+                )
                 return combined, logs, out_doc, srt_path
+            batch_run_btn.click(
+                fn=_do_batch,
+                inputs=[batch_select, batch_files, batch_model, batch_device, batch_merge, batch_mem, batch_srt, batch_use_two_pass, batch_fast_model, batch_refine_threshold],
+                outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download],
+            )
         # Memory Tab
         with gr.TabItem("Memory"):
                     with gr.Group(elem_classes="card"):
                         gr.Markdown("### Runtime & tips")
                         gr.Markdown("- Use `large-v3` only if your whisper package supports it.")
+                        gr.Markdown("- Extraction writes to a per-run temp directory under system temp. Re-extracting creates a new run dir.")
+                        gr.Markdown("- Two-pass helps on long files where heavy model is costly.")
                 with gr.Column():
                     with gr.Group(elem_classes="card"):
                         gr.Markdown("### Theme")
                         theme_toggle = gr.Button("Toggle Dark / Light Theme")
+                        gr.Markdown("Theme preference is saved in your browser (localStorage).")
                         gr.Markdown("### Diagnostics")
                         diag_btn = gr.Button("Show memory summary")
                         diag_out = gr.Textbox(label="Diagnostics", lines=12, interactive=False)