moderntranscribe

Sleeping

App Files Files Community

staraks commited on Nov 24, 2025

Commit

847997b

verified ·

1 Parent(s): f22f266

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -108

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 # app.py
-# Whisper Transcriber — Gradio 3.x compatible full file (chunked streaming)
-# Requirements: gradio (3.x), whisper, pydub, pyzipper, python-docx, ffmpeg installed
 import os
 import sys
@@ -14,13 +18,15 @@ import re
 from difflib import get_close_matches
 from uuid import uuid4
 from pathlib import Path
 # Force unbuffered prints for logs
 os.environ["PYTHONUNBUFFERED"] = "1"
 print("DEBUG: app.py bootstrap starting", flush=True)
-# Third-party imports (ensure installed)
 try:
     import gradio as gr
     import whisper
@@ -46,6 +52,40 @@ FFMPEG_CANDIDATES = [
 MODEL_CACHE = {}
 EXTRACT_MAP = {}  # friendly_name -> absolute path
 # ---------- Memory & postprocessing ----------
 def load_memory():
     try:
@@ -67,7 +107,6 @@ def load_memory():
         pass
     return mem
 def save_memory(mem):
     with MEMORY_LOCK:
         try:
@@ -76,7 +115,6 @@ def save_memory(mem):
         except Exception:
             traceback.print_exc()
 memory = load_memory()
 MEDICAL_ABBREVIATIONS = {
@@ -98,7 +136,6 @@ DRUG_NORMALIZATION = {
     "amoxicillin": "Amoxicillin",
 }
 def expand_abbreviations(text):
     tokens = re.split(r"(\s+)", text)
     out = []
@@ -114,13 +151,11 @@ def expand_abbreviations(text):
             out.append(t)
     return "".join(out)
 def normalize_drugs(text):
     for k, v in DRUG_NORMALIZATION.items():
         text = re.sub(rf"\b{k}\b", v, text, flags=re.IGNORECASE)
     return text
 def punctuation_and_capitalization(text):
     text = text.strip()
     if not text:
@@ -136,7 +171,6 @@ def punctuation_and_capitalization(text):
             out.append(p)
     return "".join(out)
 def postprocess_transcript(text):
     if not text:
         return text
@@ -146,13 +180,11 @@ def postprocess_transcript(text):
     t = punctuation_and_capitalization(t)
     return t
 def extract_words_and_phrases(text):
     words = re.findall(r"[A-Za-z0-9\-']+", text)
     sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
     return [w for w in words if w.strip()], sentences
 def update_memory_with_transcript(transcript):
     global memory
     words, sentences = extract_words_and_phrases(transcript)
@@ -168,7 +200,6 @@ def update_memory_with_transcript(transcript):
         if changed:
             save_memory(memory)
 def memory_correct_text(text, min_ratio=0.85):
     if not text or (not memory.get("words") and not memory.get("phrases")):
         return text
@@ -202,7 +233,6 @@ def memory_correct_text(text, min_ratio=0.85):
             corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
     return corrected
 # ---------- Utilities ----------
 def save_as_word(text, filename=None):
     if filename is None:
@@ -212,7 +242,6 @@ def save_as_word(text, filename=None):
     doc.save(filename)
     return filename
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
     try:
         cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
@@ -239,7 +268,6 @@ def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
             pass
         return False, str(e)
 def convert_to_wav_if_needed(input_path):
     input_path = str(input_path)
     lower = input_path.lower()
@@ -322,7 +350,6 @@ def convert_to_wav_if_needed(input_path):
     raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
 # ---------- Whisper helper ----------
 def whisper_available_models():
     try:
@@ -333,10 +360,8 @@ def whisper_available_models():
         pass
     return set(["tiny", "base", "small", "medium", "large", "large-v3"])
 AVAILABLE_MODEL_SET = whisper_available_models()
 def safe_model_choices(prefer_default="small"):
     base_choices = ["small", "medium", "large", "large-v3", "base", "tiny"]
     choices = [m for m in base_choices if m in AVAILABLE_MODEL_SET]
@@ -345,7 +370,6 @@ def safe_model_choices(prefer_default="small"):
     default = prefer_default if prefer_default in choices else choices[0]
     return choices, default
 def get_whisper_model(name, device=None):
     if name not in MODEL_CACHE:
         print(f"DEBUG: loading whisper model '{name}'", flush=True)
@@ -358,7 +382,6 @@ def get_whisper_model(name, device=None):
             MODEL_CACHE[name] = whisper.load_model(name)
     return MODEL_CACHE[name]
 # ---------- SRT helper ----------
 def segments_to_srt(segments):
     def fmt_time(t):
@@ -379,7 +402,6 @@ def segments_to_srt(segments):
         lines.append("")
     return "\n".join(lines)
 # ---------- ZIP extraction (per-run dir) ----------
 def extract_zip_and_map(zip_path, zip_password=None):
     global EXTRACT_MAP
@@ -439,7 +461,6 @@ def extract_zip_and_map(zip_path, zip_password=None):
             pass
         return [], f"Extraction failed: {e}"
 # ---------- Trim helper used in two-pass ----------
 def trim_audio_segment(src_path, start_sec, end_sec):
     src = str(src_path)
@@ -482,8 +503,7 @@ def trim_audio_segment(src_path, start_sec, end_sec):
             pass
         raise
-# ---------- Core transcription (single file, supports two-pass) ----------
 def transcribe_single_file(
     path,
     model_name="small",
@@ -495,6 +515,7 @@ def transcribe_single_file(
     refine_model=None,
     refine_threshold=-1.0,
 ):
     logs = []
     try:
         if not path:
@@ -533,16 +554,13 @@ def transcribe_single_file(
                     pass
             return text, srt_path, "\n".join(logs)
-        # Two-pass path remains unchanged (not used by generator directly)
-        # ... omitted here for brevity (two-pass logic same as previous full file) ...
-        # For the generator flow we use chunking; two-pass heavy refinement is optional
-        return "", None, "Two-pass is not invoked in this helper in streaming mode."
     except Exception as e:
         tb = traceback.format_exc()
         return "", None, f"Transcription error: {e}\n{tb}"
-# ---------- Batch transcribe (unchanged, uses transcribe_single_file) ----------
 def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
     logs = []
     transcripts = []
@@ -593,7 +611,6 @@ def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name,
     srt_return = srt_files[0] if srt_files else None
     return combined, "\n".join(logs), out_doc, srt_return
 # ---------- Build Gradio UI (3.x compatible) ----------
 print("DEBUG: building Gradio UI", flush=True)
 available_choices, default_choice = safe_model_choices(prefer_default="small")
@@ -631,7 +648,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
     <script>
     (function() {
       try {
-        // Load saved preference or fall back to OS preference, then 'light'
         var saved = null;
         try { saved = localStorage.getItem('wt_theme'); } catch(e){ saved = null; }
         var chosen = null;
@@ -643,7 +659,6 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
           chosen = 'light';
         }
         document.documentElement.setAttribute('data-theme', chosen);
         try {
           var style = document.createElement('style');
           style.innerHTML = `
@@ -662,7 +677,7 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
             gr.HTML("<div style='width:50px;height:50px;border-radius:10px;background:linear-gradient(135deg,#4f46e5,#06b6d4);display:flex;align-items:center;justify-content:center;color:white;font-weight:700;font-size:20px;'>WT</div>")
         with gr.Column():
             gr.Markdown("<h3 style='margin:0'>Whisper Transcriber (Gradio 3.x)</h3>")
-            gr.Markdown("<div class='small-note'>Two-pass speedup, per-run ZIP extraction, memory corrections, SRT export, dark/light toggle</div>")
     with gr.Tabs():
         # Single audio
@@ -675,118 +690,170 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                     device_choice = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
                     mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
                     srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
                     use_two_pass_single = gr.Checkbox(label="Use two-pass speedup (fast then refine)", value=False)
-                    fast_model_choice = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model")
                     refine_threshold_single = gr.Number(value=-1.0, label="Refine threshold (avg_logprob)", precision=2)
                     transcribe_btn = gr.Button("Transcribe", variant="primary")
                 with gr.Column(scale=1):
                     gr.Markdown("### Output")
-                    # progress: numeric slider visually works as a progress bar in Gradio 3.x
                     progress_num = gr.Slider(minimum=0, maximum=100, value=0, label="Progress (%)", interactive=False)
                     transcript_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
                     srt_download = gr.File(label="SRT (if generated)")
                     single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
-            # ---------- streaming, chunked single-file transcription ----------
-            def _single_generator(audio_file, model_name, device, mem_on, srt_on, use_two_pass_flag, fast_model, refine_thresh, chunk_size_sec=30, enable_chunking=True):
-                """
-                Generator yields tuples for Gradio outputs: (progress_num, transcript_text, srt_path_or_none, logs)
-                """
                 yield 0, "", None, "Starting..."
                 try:
                     if not audio_file:
                         yield 100, "", None, "No audio provided."
                         return
-                    # resolve input path
                     path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
-                    # Convert file to wav (yield while converting)
                     yield 2, "", None, "Converting input to WAV..."
                     wav = convert_to_wav_if_needed(path)
                     yield 8, "", None, f"Converted to WAV: {os.path.basename(wav)}"
-                    # Determine duration and chunking
-                    if enable_chunking:
                         duration = None
                         try:
-                            p = subprocess.run(["ffprobe","-v","error","-show_entries","format=duration","-of","default=noprint_wrappers=1:nokey=1", wav], capture_output=True, text=True, timeout=8)
-                            duration = float(p.stdout.strip()) if p.stdout and p.stdout.strip() else None
                         except Exception:
                             duration = None
-                        if duration is None:
-                            try:
-                                aud = AudioSegment.from_file(wav)
-                                duration = len(aud) / 1000.0
-                            except Exception:
-                                duration = None
-                        if duration and duration > chunk_size_sec * 1.5:
-                            num_chunks = max(1, int((duration + chunk_size_sec - 1) // chunk_size_sec))
-                            chunk_ranges = []
-                            start = 0.0
-                            for i in range(num_chunks):
-                                end = min(duration, start + chunk_size_sec)
-                                chunk_ranges.append((start, end))
-                                start = end
-                        else:
-                            enable_chunking = False
-                            chunk_ranges = [(0.0, None)]
                     else:
                         chunk_ranges = [(0.0, None)]
-                    # load model (single load)
-                    yield 10, "", None, f"Loading model: {model_name}..."
                     model = get_whisper_model(model_name, device=None if device == "auto" else device)
                     yield 15, "", None, f"Model loaded: {model_name}"
-                    # Prepare transcription loop
-                    overall_text_parts = []
                     total_chunks = len(chunk_ranges)
-                    for idx, (st, ed) in enumerate(chunk_ranges, start=1):
-                        try:
-                            if ed is None:
-                                chunk_wav = wav
-                                note = "full file"
-                            else:
-                                chunk_wav = trim_audio_segment(wav, st, ed)
-                                note = f"{st:.1f}s - {ed:.1f}s"
-                            yield int(15 + (idx - 1) * 70 / max(1, total_chunks)), "", None, f"Transcribing chunk {idx}/{total_chunks} ({note})..."
-                            whisper_opts = {}
-                            # keep whisper_opts minimal to speed transcribe call; model implementation may ignore unknown opts
-                            result = model.transcribe(chunk_wav, **whisper_opts)
-                            chunk_text = result.get("text", "").strip()
-                            if mem_on:
-                                chunk_text = memory_correct_text(chunk_text)
-                            chunk_text = postprocess_transcript(chunk_text)
-                            overall_text_parts.append(chunk_text)
-                            if ed is not None and chunk_wav and os.path.exists(chunk_wav) and chunk_wav != wav:
-                                try:
-                                    os.unlink(chunk_wav)
-                                except Exception:
-                                    pass
-                            partial = "\n\n".join(overall_text_parts)
-                            prog = int(15 + idx * 70 / max(1, total_chunks))
-                            yield prog, partial, None, f"Completed chunk {idx}/{total_chunks}."
-                        except Exception as e:
-                            yield int(15 + idx * 70 / max(1, total_chunks)), "\n\n".join(overall_text_parts), None, f"Chunk {idx} failed: {e}\n{traceback.format_exc()}"
                     # final assembly
-                    final_text = "\n\n".join([p for p in overall_text_parts if p])
                     if mem_on:
                         try:
                             update_memory_with_transcript(final_text)
                         except Exception:
                             pass
-                    # generate SRT if requested (best-effort using full model segments)
                     srt_path = None
                     if srt_on:
                         try:
@@ -816,11 +883,11 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
             transcribe_btn.click(
                 fn=_single_generator,
-                inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle, use_two_pass_single, fast_model_choice, refine_threshold_single],
                 outputs=[progress_num, transcript_out, srt_download, single_logs],
             )
-        # Batch tab
         with gr.TabItem("Batch Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -877,7 +944,7 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
                 outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download],
             )
-        # Memory tab
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -968,14 +1035,14 @@ with gr.Blocks(title="Whisper Transcriber (3.x)", css=CSS) as demo:
             mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
-        # Settings tab (theme toggle via injected HTML)
         with gr.TabItem("Settings"):
             with gr.Row():
                 with gr.Column():
                     gr.Markdown("### Runtime & tips")
                     gr.Markdown("- Use `large-v3` only if your whisper package supports it.")
                     gr.Markdown("- Extraction writes to a per-run temp directory under system temp.")
-                    gr.Markdown("- Two-pass helps when heavy model is slow.")
                 with gr.Column():
                     gr.Markdown("### Theme")
                     gr.HTML("""

 # app.py
+# Whisper Transcriber — Gradio 3.x compatible full file
+# Features added: chunk size control, experimental parallel chunk transcription (CPU-only),
+# streaming progress bar (no audio preview), memory corrections, ZIP extraction, theme toggle.
+#
+# Requirements: gradio (3.x), whisper, pydub, pyzipper, python-docx, ffmpeg installed.
+# Experimental parallel mode uses multiprocessing and loads the 'fast' model in each worker.
 import os
 import sys
 from difflib import get_close_matches
 from uuid import uuid4
 from pathlib import Path
+from multiprocessing import get_context
+from typing import Tuple, List
 # Force unbuffered prints for logs
 os.environ["PYTHONUNBUFFERED"] = "1"
 print("DEBUG: app.py bootstrap starting", flush=True)
+# Third-party imports
 try:
     import gradio as gr
     import whisper
 MODEL_CACHE = {}
 EXTRACT_MAP = {}  # friendly_name -> absolute path
+# ---------- Worker-global for multiprocessing ----------
+# These are defined for worker processes (initialized via initializer)
+WORKER_MODEL = None  # type: ignore
+def worker_init(model_name: str, device: str):
+    """
+    Multiprocessing worker initializer: load a whisper model per worker.
+    Use device='cpu' for workers (recommended).
+    """
+    global WORKER_MODEL
+    try:
+        if device and device != "auto":
+            WORKER_MODEL = whisper.load_model(model_name, device=device)
+        else:
+            WORKER_MODEL = whisper.load_model(model_name)
+    except Exception:
+        # fallback: try load without device arg
+        WORKER_MODEL = whisper.load_model(model_name)
+def worker_transcribe_chunk(chunk_path: str) -> Tuple[str, str]:
+    """
+    Worker function to transcribe a chunk using WORKER_MODEL.
+    Returns (text, error_message). error_message empty if OK.
+    """
+    global WORKER_MODEL
+    try:
+        if WORKER_MODEL is None:
+            return "", "Worker model not loaded"
+        res = WORKER_MODEL.transcribe(chunk_path)
+        text = res.get("text", "").strip()
+        return text, ""
+    except Exception as e:
+        return "", f"Worker transcription error: {e}\n{traceback.format_exc()}"
 # ---------- Memory & postprocessing ----------
 def load_memory():
     try:
         pass
     return mem
 def save_memory(mem):
     with MEMORY_LOCK:
         try:
         except Exception:
             traceback.print_exc()
 memory = load_memory()
 MEDICAL_ABBREVIATIONS = {
     "amoxicillin": "Amoxicillin",
 }
 def expand_abbreviations(text):
     tokens = re.split(r"(\s+)", text)
     out = []
             out.append(t)
     return "".join(out)
 def normalize_drugs(text):
     for k, v in DRUG_NORMALIZATION.items():
         text = re.sub(rf"\b{k}\b", v, text, flags=re.IGNORECASE)
     return text
 def punctuation_and_capitalization(text):
     text = text.strip()
     if not text:
             out.append(p)
     return "".join(out)
 def postprocess_transcript(text):
     if not text:
         return text
     t = punctuation_and_capitalization(t)
     return t
 def extract_words_and_phrases(text):
     words = re.findall(r"[A-Za-z0-9\-']+", text)
     sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
     return [w for w in words if w.strip()], sentences
 def update_memory_with_transcript(transcript):
     global memory
     words, sentences = extract_words_and_phrases(transcript)
         if changed:
             save_memory(memory)
 def memory_correct_text(text, min_ratio=0.85):
     if not text or (not memory.get("words") and not memory.get("phrases")):
         return text
             corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
     return corrected
 # ---------- Utilities ----------
 def save_as_word(text, filename=None):
     if filename is None:
     doc.save(filename)
     return filename
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
     try:
         cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
             pass
         return False, str(e)
 def convert_to_wav_if_needed(input_path):
     input_path = str(input_path)
     lower = input_path.lower()
     raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
 # ---------- Whisper helper ----------
 def whisper_available_models():
     try:
         pass
     return set(["tiny", "base", "small", "medium", "large", "large-v3"])
 AVAILABLE_MODEL_SET = whisper_available_models()
 def safe_model_choices(prefer_default="small"):
     base_choices = ["small", "medium", "large", "large-v3", "base", "tiny"]
     choices = [m for m in base_choices if m in AVAILABLE_MODEL_SET]
     default = prefer_default if prefer_default in choices else choices[0]
     return choices, default
 def get_whisper_model(name, device=None):
     if name not in MODEL_CACHE:
         print(f"DEBUG: loading whisper model '{name}'", flush=True)
             MODEL_CACHE[name] = whisper.load_model(name)
     return MODEL_CACHE[name]
 # ---------- SRT helper ----------
 def segments_to_srt(segments):
     def fmt_time(t):
         lines.append("")
     return "\n".join(lines)
 # ---------- ZIP extraction (per-run dir) ----------
 def extract_zip_and_map(zip_path, zip_password=None):
     global EXTRACT_MAP
             pass
         return [], f"Extraction failed: {e}"
 # ---------- Trim helper used in two-pass ----------
 def trim_audio_segment(src_path, start_sec, end_sec):
     src = str(src_path)
             pass
         raise
+# ---------- Core transcription (single file) ----------
 def transcribe_single_file(
     path,
     model_name="small",
     refine_model=None,
     refine_threshold=-1.0,
 ):
+    # non-streaming convenience helper used for batch mode
     logs = []
     try:
         if not path:
                     pass
             return text, srt_path, "\n".join(logs)
+        # Two-pass path not used for streaming generator here
+        return "", None, "Two-pass not used in this helper."
     except Exception as e:
         tb = traceback.format_exc()
         return "", None, f"Transcription error: {e}\n{tb}"
+# ---------- Batch transcribe (unchanged) ----------
 def batch_transcribe(friendly_selected, uploaded_files, model_name, device_name, merge_flag, enable_mem, generate_srt, use_two_pass=False, fast_model="small", refine_threshold=-1.0):
     logs = []
     transcripts = []
     srt_return = srt_files[0] if srt_files else None
     return combined, "\n".join(logs), out_doc, srt_return
 # ---------- Build Gradio UI (3.x compatible) ----------
 print("DEBUG: building Gradio UI", flush=True)
 available_choices, default_choice = safe_model_choices(prefer_default="small")
     <script>
     (function() {
       try {
         var saved = null;
         try { saved = localStorage.getItem('wt_theme'); } catch(e){ saved = null; }
         var chosen = null;
           chosen = 'light';
         }
         document.documentElement.setAttribute('data-theme', chosen);
         try {
           var style = document.createElement('style');
           style.innerHTML = `
             gr.HTML("<div style='width:50px;height:50px;border-radius:10px;background:linear-gradient(135deg,#4f46e5,#06b6d4);display:flex;align-items:center;justify-content:center;color:white;font-weight:700;font-size:20px;'>WT</div>")
         with gr.Column():
             gr.Markdown("<h3 style='margin:0'>Whisper Transcriber (Gradio 3.x)</h3>")
+            gr.Markdown("<div class='small-note'>Chunked streaming, experimental CPU parallel, per-run ZIP extraction, memory corrections, SRT export, dark/light toggle</div>")
     with gr.Tabs():
         # Single audio
                     device_choice = gr.Dropdown(choices=["auto", "cpu", "cuda"], value="auto", label="Device")
                     mem_toggle = gr.Checkbox(label="Enable memory corrections", value=False)
                     srt_toggle = gr.Checkbox(label="Generate SRT", value=False)
+                    # chunk controls
+                    chunk_controls_row = gr.Row(visible=True)
+                    chunk_size_input = gr.Number(value=30, label="Chunk size (seconds)", precision=0)
+                    enable_chunking = gr.Checkbox(label="Enable chunking (recommended for long files)", value=True)
+                    # parallel experimental
+                    parallel_checkbox = gr.Checkbox(label="Enable experimental parallel chunk transcription (CPU only)", value=False)
+                    parallel_workers = gr.Slider(minimum=1, maximum=max(1, os.cpu_count() or 4), value=2, step=1, label="Parallel workers (processes)")
                     use_two_pass_single = gr.Checkbox(label="Use two-pass speedup (fast then refine)", value=False)
+                    fast_model_choice = gr.Dropdown(choices=[c for c in ["tiny", "base", "small"] if c in AVAILABLE_MODEL_SET], value="small", label="Fast model (for two-pass / workers)")
                     refine_threshold_single = gr.Number(value=-1.0, label="Refine threshold (avg_logprob)", precision=2)
                     transcribe_btn = gr.Button("Transcribe", variant="primary")
                 with gr.Column(scale=1):
                     gr.Markdown("### Output")
                     progress_num = gr.Slider(minimum=0, maximum=100, value=0, label="Progress (%)", interactive=False)
                     transcript_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
                     srt_download = gr.File(label="SRT (if generated)")
                     single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
+            # streaming generator with optional multiprocessing
+            def _single_generator(audio_file, model_name, device, mem_on, srt_on, chunk_size_sec, chunking_enabled, parallel_enabled, workers, use_two_pass_flag, fast_model, refine_thresh):
                 yield 0, "", None, "Starting..."
                 try:
                     if not audio_file:
                         yield 100, "", None, "No audio provided."
                         return
                     path = audio_file if isinstance(audio_file, str) else (audio_file.name if hasattr(audio_file, "name") else str(audio_file))
                     yield 2, "", None, "Converting input to WAV..."
                     wav = convert_to_wav_if_needed(path)
                     yield 8, "", None, f"Converted to WAV: {os.path.basename(wav)}"
+                    # determine duration
+                    duration = None
+                    try:
+                        p = subprocess.run(["ffprobe","-v","error","-show_entries","format=duration","-of","default=noprint_wrappers=1:nokey=1", wav], capture_output=True, text=True, timeout=8)
+                        duration = float(p.stdout.strip()) if p.stdout and p.stdout.strip() else None
+                    except Exception:
                         duration = None
+                    if duration is None:
                         try:
+                            aud = AudioSegment.from_file(wav)
+                            duration = len(aud) / 1000.0
                         except Exception:
                             duration = None
+                    # build chunk ranges
+                    if chunking_enabled and (duration and duration > chunk_size_sec * 1.5):
+                        num_chunks = max(1, int((duration + chunk_size_sec - 1) // chunk_size_sec))
+                        chunk_ranges = []
+                        start = 0.0
+                        for i in range(num_chunks):
+                            end = min(duration, start + chunk_size_sec)
+                            chunk_ranges.append((start, end))
+                            start = end
                     else:
                         chunk_ranges = [(0.0, None)]
+                        chunking_enabled = False
+                    yield 10, "", None, f"Preparing transcription ({len(chunk_ranges)} chunk(s))..."
+                    # Load model in main process (for serial or orchestration)
                     model = get_whisper_model(model_name, device=None if device == "auto" else device)
                     yield 15, "", None, f"Model loaded: {model_name}"
+                    overall_parts = []
                     total_chunks = len(chunk_ranges)
+                    # Decide whether we can/should run parallel workers
+                    parallel_used = False
+                    if parallel_enabled and chunking_enabled and total_chunks > 1:
+                        if device != "cpu" and device != "auto":
+                            # Most likely GPU requested; parallel across multiple processes with GPU not recommended
+                            yield 15, "", None, "Parallel mode requested but device is not 'cpu'. Falling back to serial chunking."
+                            parallel_used = False
+                        else:
+                            # attempt to spawn a multiprocessing pool that initializes each worker with fast_model on CPU
+                            try:
+                                ctx = get_context("spawn")
+                                worker_count = max(1, int(workers))
+                                yield 18, "", None, f"Starting parallel pool with {worker_count} workers (fast_model={fast_model})..."
+                                pool = ctx.Pool(processes=worker_count, initializer=worker_init, initargs=(fast_model, "cpu"))
+                                # prepare chunk WAVs
+                                chunk_paths = []
+                                temp_chunk_files = []
+                                for (st, ed) in chunk_ranges:
+                                    if ed is None:
+                                        chunk_paths.append(wav)
+                                    else:
+                                        cw = trim_audio_segment(wav, st, ed)
+                                        chunk_paths.append(cw)
+                                        temp_chunk_files.append(cw)
+                                # map transcribe jobs
+                                results = pool.map(worker_transcribe_chunk, chunk_paths)
+                                pool.close()
+                                pool.join()
+                                # process results in order
+                                for idx, (txt, err) in enumerate(results, start=1):
+                                    if err:
+                                        yield int(20 + idx * 70 / max(1, total_chunks)), "\n\n".join(overall_parts), None, f"Chunk {idx} worker error: {err}"
+                                    else:
+                                        if mem_on:
+                                            txt = memory_correct_text(txt)
+                                        txt = postprocess_transcript(txt)
+                                        overall_parts.append(txt)
+                                        prog = int(20 + idx * 70 / max(1, total_chunks))
+                                        yield prog, "\n\n".join(overall_parts), None, f"Completed chunk {idx}/{total_chunks} (parallel)."
+                                # cleanup temp chunks (but not original wav)
+                                for tfile in temp_chunk_files:
+                                    try:
+                                        if os.path.exists(tfile):
+                                            os.unlink(tfile)
+                                    except Exception:
+                                        pass
+                                parallel_used = True
+                            except Exception as e:
+                                yield 20, "", None, f"Parallel execution failed, falling back to serial: {e}\n{traceback.format_exc()}"
+                                parallel_used = False
+                    if not parallel_used:
+                        # serial chunk processing
+                        for idx, (st, ed) in enumerate(chunk_ranges, start=1):
+                            try:
+                                if ed is None:
+                                    chunk_wav = wav
+                                    note = "full file"
+                                else:
+                                    chunk_wav = trim_audio_segment(wav, st, ed)
+                                    note = f"{st:.1f}s - {ed:.1f}s"
+                                yield int(15 + (idx - 1) * 70 / max(1, total_chunks)), "", None, f"Transcribing chunk {idx}/{total_chunks} ({note})..."
+                                # call model.transcribe on chunk
+                                whisper_opts = {}
+                                result = model.transcribe(chunk_wav, **whisper_opts)
+                                chunk_text = result.get("text", "").strip()
+                                if mem_on:
+                                    chunk_text = memory_correct_text(chunk_text)
+                                chunk_text = postprocess_transcript(chunk_text)
+                                overall_parts.append(chunk_text)
+                                if ed is not None and chunk_wav and os.path.exists(chunk_wav) and chunk_wav != wav:
+                                    try:
+                                        os.unlink(chunk_wav)
+                                    except Exception:
+                                        pass
+                                partial = "\n\n".join(overall_parts)
+                                prog = int(15 + idx * 70 / max(1, total_chunks))
+                                yield prog, partial, None, f"Completed chunk {idx}/{total_chunks}."
+                            except Exception as e:
+                                yield int(15 + idx * 70 / max(1, total_chunks)), "\n\n".join(overall_parts), None, f"Chunk {idx} failed: {e}\n{traceback.format_exc()}"
                     # final assembly
+                    final_text = "\n\n".join([p for p in overall_parts if p])
                     if mem_on:
                         try:
                             update_memory_with_transcript(final_text)
                         except Exception:
                             pass
+                    # SRT generation best-effort (runs a full transcribe to get segments)
                     srt_path = None
                     if srt_on:
                         try:
             transcribe_btn.click(
                 fn=_single_generator,
+                inputs=[single_audio, model_select, device_choice, mem_toggle, srt_toggle, chunk_size_input, enable_chunking, parallel_checkbox, parallel_workers, use_two_pass_single, fast_model_choice, refine_threshold_single],
                 outputs=[progress_num, transcript_out, srt_download, single_logs],
             )
+        # Batch tab (unchanged UI and behavior)
         with gr.TabItem("Batch Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
                 outputs=[batch_trans_out, batch_logs, batch_doc_download, batch_srt_download],
             )
+        # Memory tab (unchanged)
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
             mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
+        # Settings tab (theme)
         with gr.TabItem("Settings"):
             with gr.Row():
                 with gr.Column():
                     gr.Markdown("### Runtime & tips")
                     gr.Markdown("- Use `large-v3` only if your whisper package supports it.")
                     gr.Markdown("- Extraction writes to a per-run temp directory under system temp.")
+                    gr.Markdown("- Two-pass helps when heavy model is slow; experimental parallel helps primarily for CPU workloads with many cores.")
                 with gr.Column():
                     gr.Markdown("### Theme")
                     gr.HTML("""