moderntranscribe

Sleeping

App Files Files Community

staraks commited on Nov 25, 2025

Commit

8174d48

verified ·

1 Parent(s): 77f67b0

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -124

app.py CHANGED Viewed

@@ -1,20 +1,6 @@
 # app.py
-# Whisper Transcriber — improved with:
-# - Memory ZIP preview + selective import
-# - faster-whisper auto-detect + multiprocessing batch transcription
-# - progress streaming generator + per-file transcript ZIP
-#
-# Requirements:
-# - gradio (3.x)
-# - python-docx
-# - pydub
-# - pyzipper
-# - ffmpeg installed on system
-# - whisper OR faster-whisper (faster-whisper recommended for CPU speed)
-#
-# Notes:
-# - Multiprocessing may load model per worker (memory heavy). Tune MAX_WORKERS.
-# - This file is designed to be dropped into your existing project and run.
 import os
 import sys
@@ -33,36 +19,30 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
 import multiprocessing
 import time
-# Gradio & model libs
 try:
     import gradio as gr
 except Exception as e:
     print("FATAL: gradio import failed:", e)
     raise
-# Try to import faster_whisper first, fallback to openai/whisper
 USE_FASTER_WHISPER = False
 try:
     from faster_whisper import WhisperModel as FasterWhisperModel
     USE_FASTER_WHISPER = True
     print("INFO: faster-whisper available — will use it for faster CPU inference.")
 except Exception:
     try:
-        import whisper  # fallback
     except Exception:
         print("FATAL: Neither faster-whisper nor whisper available.")
         raise
-# Audio processing
 from pydub import AudioSegment
 import pyzipper
 from docx import Document
-# Force unbuffered prints for logs
 os.environ["PYTHONUNBUFFERED"] = "1"
-# ---------- Config ----------
 MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
 MIN_WAV_SIZE = 1024
@@ -73,16 +53,13 @@ FFMPEG_CANDIDATES = [
     ("pcm_s16le", 44100, 2),
     ("mulaw", 8000, 1),
 ]
-MODEL_CACHE = {}  # name -> model instance (only for main process, workers load models separately)
-EXTRACT_MAP = {}  # friendly_name -> absolute path (per-run)
 DEFAULT_ZIP_PASS = "dietcoke1"
-# Multiprocessing tuning: set a sensible default
 CPU_COUNT = max(1, multiprocessing.cpu_count())
-MAX_WORKERS = min(4, CPU_COUNT)  # adjust as needed; each worker loads a model (memory heavy)
-# ---------- Memory helpers ----------
 def load_memory():
     try:
         if os.path.exists(MEMORY_FILE):
@@ -103,7 +80,6 @@ def load_memory():
         pass
     return mem
 def save_memory(mem):
     with MEMORY_LOCK:
         try:
@@ -112,11 +88,8 @@ def save_memory(mem):
         except Exception:
             traceback.print_exc()
 memory = load_memory()
-# ---------- Postprocessing ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
     "dx": "diagnosis",
@@ -136,7 +109,6 @@ DRUG_NORMALIZATION = {
     "amoxicillin": "Amoxicillin",
 }
 def expand_abbreviations(text):
     tokens = re.split(r"(\s+)", text)
     out = []
@@ -152,13 +124,11 @@ def expand_abbreviations(text):
             out.append(t)
     return "".join(out)
 def normalize_drugs(text):
     for k, v in DRUG_NORMALIZATION.items():
         text = re.sub(rf"\b{k}\b", v, text, flags=re.IGNORECASE)
     return text
 def punctuation_and_capitalization(text):
     text = text.strip()
     if not text:
@@ -174,7 +144,6 @@ def punctuation_and_capitalization(text):
             out.append(p)
     return "".join(out)
 def postprocess_transcript(text):
     if not text:
         return text
@@ -184,13 +153,11 @@ def postprocess_transcript(text):
     t = punctuation_and_capitalization(t)
     return t
 def extract_words_and_phrases(text):
     words = re.findall(r"[A-Za-z0-9\-']+", text)
     sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
     return [w for w in words if w.strip()], sentences
 def update_memory_with_transcript(transcript):
     global memory
     words, sentences = extract_words_and_phrases(transcript)
@@ -206,7 +173,6 @@ def update_memory_with_transcript(transcript):
         if changed:
             save_memory(memory)
 def memory_correct_text(text, min_ratio=0.85):
     if not text or (not memory.get("words") and not memory.get("phrases")):
         return text
@@ -240,8 +206,6 @@ def memory_correct_text(text, min_ratio=0.85):
             corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
     return corrected
-# ---------- File & conversion utilities ----------
 def save_as_word(text, filename=None):
     if filename is None:
         filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
@@ -250,7 +214,6 @@ def save_as_word(text, filename=None):
     doc.save(filename)
     return filename
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
     try:
         cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
@@ -277,7 +240,6 @@ def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
             pass
         return False, str(e)
 def convert_to_wav_if_needed(input_path):
     input_path = str(input_path)
     lower = input_path.lower()
@@ -305,7 +267,6 @@ def convert_to_wav_if_needed(input_path):
         except Exception:
             pass
-    # ffmpeg fallback
     diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
     diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
     diagnostics = []
@@ -361,12 +322,9 @@ def convert_to_wav_if_needed(input_path):
     raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
-# ---------- Model utils (main process; workers load locally inside worker fn) ----------
 def whisper_available_models():
     try:
         if USE_FASTER_WHISPER:
-            # faster-whisper doesn't provide available_models; trust common names
             return set(["tiny", "base", "small", "medium", "large", "large-v3"])
         else:
             models = whisper.available_models()
@@ -376,10 +334,8 @@ def whisper_available_models():
         pass
     return set(["tiny", "base", "small", "medium", "large", "large-v3"])
 AVAILABLE_MODEL_SET = whisper_available_models()
 def safe_model_choices(prefer_default="small"):
     base_choices = ["small", "medium", "large", "large-v3", "base", "tiny"]
     choices = [m for m in base_choices if m in AVAILABLE_MODEL_SET]
@@ -388,27 +344,17 @@ def safe_model_choices(prefer_default="small"):
     default = prefer_default if prefer_default in choices else choices[0]
     return choices, default
-# Worker transcribe function (runs in worker process)
 def _worker_transcribe(args):
-    """
-    This function is invoked inside a worker process.
-    args: (file_path, model_name, device_name, enable_memory, generate_srt, use_two_pass, fast_model, refine_threshold)
-    Returns: dict{ 'file': basename, 'text_path': path, 'srt_path': path or None, 'log': str }
-    """
     try:
         (file_path, model_name, device_name, enable_memory, generate_srt, use_two_pass, fast_model, refine_threshold) = args
         base = os.path.basename(file_path)
         log_lines = []
         device = None if device_name == "auto" else device_name
-        # Load model inside worker (no sharing)
         model = None
         use_fw = False
         try:
             if USE_FASTER_WHISPER:
-                # faster-whisper uses WhisperModel(model_size_or_path, device=..., compute_type=...)
-                # Use default compute_type; user can customize code if desired
                 model = FasterWhisperModel(model_name, device=device if device else "cpu")
                 use_fw = True
                 log_lines.append(f"Worker: faster-whisper loaded {model_name}")
@@ -419,7 +365,6 @@ def _worker_transcribe(args):
                 log_lines.append(f"Worker: whisper loaded {model_name}")
         except Exception as e:
             log_lines.append(f"Worker model load failed: {e}")
-            # attempt fallback to small
             try:
                 if USE_FASTER_WHISPER:
                     model = FasterWhisperModel("small", device=device if device else "cpu")
@@ -432,24 +377,19 @@ def _worker_transcribe(args):
             except Exception as e2:
                 return {"file": base, "text_path": None, "srt_path": None, "log": "Model load failed: " + str(e2)}
-        # Convert to WAV
         try:
             wav = convert_to_wav_if_needed(file_path)
             log_lines.append(f"Converted to WAV: {os.path.basename(wav)}")
         except Exception as e:
             return {"file": base, "text_path": None, "srt_path": None, "log": "Conversion failed: " + str(e)}
-        # Transcribe — two modes: faster-whisper usage differs
         try:
             if use_fw:
-                # faster-whisper returns (segments, info) via transcribe(..., beam_size=..., vad_filter=False)
                 segments, info = model.transcribe(wav, beam_size=5)
                 text = "".join([seg.text for seg in segments]).strip()
-                # segments: objects with start/end/text
                 if generate_srt:
                     srt_text = []
                     for i, seg in enumerate(segments, start=1):
-                        # seg.start, seg.end, seg.text
                         start = getattr(seg, "start", 0)
                         end = getattr(seg, "end", 0)
                         txt = getattr(seg, "text", "").strip()
@@ -467,14 +407,11 @@ def _worker_transcribe(args):
         except Exception as e:
             return {"file": base, "text_path": None, "srt_path": None, "log": "Transcription failed: " + str(e)}
-        # Apply memory correction if requested
         if enable_memory and text:
             text = memory_correct_text(text)
-        # Postprocess
         text = postprocess_transcript(text)
-        # Write text and srt to temp files
         txt_tmp = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
         txt_tmp.close()
         with open(txt_tmp.name, "w", encoding="utf-8") as fh:
@@ -488,7 +425,6 @@ def _worker_transcribe(args):
                 fh.write(srt_out)
             srt_path = srt_tmp.name
-        # Clean up WAV if created
         try:
             if wav and os.path.exists(wav) and not file_path.lower().endswith(".wav"):
                 os.unlink(wav)
@@ -500,8 +436,6 @@ def _worker_transcribe(args):
         tb = traceback.format_exc()
         return {"file": os.path.basename(file_path) if file_path else "unknown", "text_path": None, "srt_path": None, "log": f"Worker exception: {e}\n{tb}"}
-# small helpers used by worker
 def _fmt_time(t):
     h = int(t // 3600)
     m = int((t % 3600) // 60)
@@ -509,7 +443,6 @@ def _fmt_time(t):
     ms = int((t - int(t)) * 1000)
     return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
 def _segments_to_srt(segments):
     lines = []
     for i, seg in enumerate(segments, start=1):
@@ -522,8 +455,6 @@ def _segments_to_srt(segments):
         lines.append("")
     return "\n".join(lines)
-# ---------- ZIP extraction (friendly mapping) ----------
 def extract_zip_and_map(zip_path, zip_password=None):
     global EXTRACT_MAP
     EXTRACT_MAP = {}
@@ -582,8 +513,6 @@ def extract_zip_and_map(zip_path, zip_password=None):
             pass
         return [], f"Extraction failed: {e}"
-# ---------- Batch transcription with parallel workers & streaming progress ----------
 def batch_transcribe_parallel_generator(
     friendly_selected,
     uploaded_files,
@@ -597,15 +526,10 @@ def batch_transcribe_parallel_generator(
     refine_threshold=-1.0,
     zip_password=None,
 ):
-    """
-    Generator that yields (logs_text, combined_text, zip_path_or_None, percent_int)
-    It runs multiple workers in parallel and yields progress updates as files complete.
-    """
     logs = []
     transcripts = []
-    per_file_paths = []  # list of (basename, text_tmp, srt_tmp)
     try:
-        # Build paths list
         paths = []
         if friendly_selected:
             for key in friendly_selected:
@@ -629,20 +553,16 @@ def batch_transcribe_parallel_generator(
         logs.append(f"Starting batch of {total} files with up to {MAX_WORKERS} workers.")
         yield "\n\n".join(logs), "", None, 2
-        # Prepare task args
         tasks = []
         for p in paths:
             tasks.append((p, model_name, device_name, enable_mem, generate_srt, use_two_pass, fast_model, refine_threshold))
-        # Run in ProcessPoolExecutor
         completed = 0
-        results = []
         with ProcessPoolExecutor(max_workers=min(MAX_WORKERS, total)) as exe:
             futs = {exe.submit(_worker_transcribe, t): t for t in tasks}
             for fut in as_completed(futs):
                 res = fut.result()
                 completed += 1
-                # res has keys: file, text_path, srt_path, log
                 fname = res.get("file")
                 res_log = res.get("log", "")
                 logs.append(f"[{completed}/{total}] {fname}: {res_log}")
@@ -657,11 +577,9 @@ def batch_transcribe_parallel_generator(
                             txt_content = fh.read()
                     transcripts.append(f"FILE: {fname}\n{txt_content}\n")
                     per_file_paths.append((fname, txtp, srtp))
-                # progress update
                 pct = int(5 + (completed / total) * 90)
                 yield "\n\n".join(logs), "\n\n".join(transcripts), None, pct
-        # combine and optionally merge into DOCX and per-file zip
         combined = "\n\n".join(transcripts)
         out_doc = None
         if merge_flag:
@@ -671,7 +589,6 @@ def batch_transcribe_parallel_generator(
             except Exception as e:
                 logs.append(f"Merge failed: {e}")
-        # Create ZIP with per-file transcripts (txt + srt if available)
         if per_file_paths:
             zip_tmp = tempfile.NamedTemporaryFile(suffix=".zip", delete=False)
             zip_tmp.close()
@@ -681,7 +598,6 @@ def batch_transcribe_parallel_generator(
                     try:
                         zf.write(txtp, arcname=arc_txt)
                     except Exception:
-                        # fallback: write name-safe
                         zf.write(txtp, arcname=os.path.basename(txtp))
                     if srtp and os.path.exists(srtp):
                         arc_srt = f"{fname}.srt"
@@ -698,9 +614,6 @@ def batch_transcribe_parallel_generator(
         logs.append(f"Batch error: {e}\n{tb}")
         yield "\n\n".join(logs), "\n\n".join(transcripts), None, 100
-# ---------- Robust multi-file memory importer with preview ----------
 def _read_file_text_try_encodings(path):
     encodings = ["utf-8", "utf-16", "latin-1"]
     for enc in encodings:
@@ -723,7 +636,6 @@ def _read_file_text_try_encodings(path):
     except Exception:
         return None, None
 def _process_single_memory_text(text):
     added = 0
     try:
@@ -770,9 +682,7 @@ def _process_single_memory_text(text):
                     added += 1
     return added
 def preview_zip_members_for_memory(zip_path):
-    """Return list of text-like members and a log string"""
     members = []
     logs = []
     try:
@@ -782,11 +692,9 @@ def preview_zip_members_for_memory(zip_path):
                     continue
                 name = info.filename
                 _, ext = os.path.splitext(name)
-                # consider likely text files
                 if ext.lower() in [".txt", ".json", ".csv", ".list", ".md"]:
                     members.append(name)
                 else:
-                    # also include others but mark as maybe-binary
                     members.append(name)
             if not members:
                 logs.append("No members found in ZIP.")
@@ -796,15 +704,10 @@ def preview_zip_members_for_memory(zip_path):
         logs.append(f"ZIP preview failed: {e}")
     return members, "\n".join(logs)
 def import_memory_files_multiple(uploaded_files, zip_members_to_import=None):
-    """
-    Accept list of file paths (or single), or ZIP + selected ZIP members list.
-    """
     if not uploaded_files:
         return "No files provided."
-    # normalize uploaded_files
     if isinstance(uploaded_files, (str, os.PathLike)):
         uploaded_files = [str(uploaded_files)]
     elif isinstance(uploaded_files, dict) and uploaded_files.get("name"):
@@ -830,7 +733,6 @@ def import_memory_files_multiple(uploaded_files, zip_members_to_import=None):
                 messages.append(f"Missing: {fp}")
                 continue
             if fp.lower().endswith(".zip"):
-                # if zip_members_to_import is provided, only import those
                 try:
                     with zipfile.ZipFile(fp, "r") as zf:
                         for info in zf.infolist():
@@ -860,7 +762,6 @@ def import_memory_files_multiple(uploaded_files, zip_members_to_import=None):
                 except zipfile.BadZipFile:
                     skipped.append(f"Bad zip: {fp}")
                     continue
-            # otherwise plain file
             text, used_enc = _read_file_text_try_encodings(fp)
             if text is None:
                 skipped.append(fp)
@@ -881,8 +782,6 @@ def import_memory_files_multiple(uploaded_files, zip_members_to_import=None):
         summary.extend(skipped)
     return "\n".join(summary)
-# ---------- Build Gradio UI ----------
 print("DEBUG: building Gradio UI", flush=True)
 available_choices, default_choice = safe_model_choices(prefer_default="small")
@@ -914,7 +813,6 @@ body { background: var(--bg); color: var(--text); font-family: Inter, system-ui,
 """
 with gr.Blocks(title="Whisper Transcriber — Parallel + Memory preview", css=CSS) as demo:
-    # Theme init: dark by default
     gr.HTML("""
     <script>
     (function() {
@@ -934,7 +832,6 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory preview", css=CS
     gr.Markdown("<div class='small-note'>Preview ZIP members for memory import, parallel batch transcription, faster-whisper auto-detect, per-file transcript downloads</div>")
     with gr.Tabs():
-        # Single tab (keeps simple)
         with gr.TabItem("Single File"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -947,21 +844,25 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory preview", css=CS
                 with gr.Column(scale=1):
                     single_trans_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
                     single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
             def _do_single(audio, model_name, device_name, mem_on, srt_on):
                 if not audio:
                     return "", "No audio supplied."
                 path = audio if isinstance(audio, str) else (audio.name if hasattr(audio, "name") else str(audio))
-                txt, srtp, lg = _worker_transcribe((path, model_name, device_name, mem_on, srt_on, False, "small", -1.0))
-                # read back text file if present
-                if txt.get("text_path"):
-                    with open(txt["text_path"], "r", encoding="utf-8", errors="replace") as fh:
-                        content = fh.read()
                 else:
                     content = ""
-                return content, txt.get("log", lg)
             trans_single_btn.click(fn=_do_single, inputs=[single_audio, model_sel_single, device_single, mem_single, srt_single], outputs=[single_trans_out, single_logs])
-        # Batch tab
         with gr.TabItem("Batch Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -998,9 +899,7 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory preview", css=CS
             batch_preview_btn.click(fn=_preview_zip, inputs=[batch_zip, batch_zip_pass], outputs=[batch_preview_out])
-            # Bind the generator for parallel batch with progress slider
             def _start_batch(friendly_selected, uploaded_files, zip_file, zip_pass, model_name, device_name, merge_flag, mem_flag, srt_flag, use_two_pass, fast_model, refine_thresh):
-                # Normalize uploaded_files into list of paths (gradio provides list of dicts or strings)
                 up = uploaded_files
                 if isinstance(up, dict) and up.get("name"):
                     up = [up["name"]]
@@ -1022,10 +921,8 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory preview", css=CS
                 fn=_start_batch,
                 inputs=[batch_select, batch_files, batch_zip, batch_zip_pass, batch_model, batch_device, batch_merge, batch_mem, batch_srt, batch_use_two_pass, batch_fast_model, batch_refine_thresh],
                 outputs=[batch_logs_out, batch_combined_out, batch_zip_download, batch_progress],
-                _js=None,  # compatibility
             )
-        # Memory tab with preview-import flow
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -1049,7 +946,6 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory preview", css=CS
             def _preview_many_zip(uploaded):
                 if not uploaded:
                     return "No files."
-                # find first zip among uploaded and preview it
                 if isinstance(uploaded, dict) and uploaded.get("name"):
                     uploaded = [uploaded["name"]]
                 members_total = []
@@ -1110,14 +1006,12 @@ with gr.Blocks(title="Whisper Transcriber — Parallel + Memory preview", css=CS
             mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
-        # Settings
         with gr.TabItem("Settings"):
             gr.Markdown("### Settings & tips")
             gr.Markdown(f"- Faster-whisper auto-detected: {USE_FASTER_WHISPER}")
             gr.Markdown(f"- Max workers for parallel transcribe: {MAX_WORKERS}")
             gr.Markdown("- If memory or RAM is limited, set MAX_WORKERS lower in code.")
-# ---------- Launch ----------
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     print("DEBUG: launching on port", port)

 # app.py
+# Whisper Transcriber — Parallel + Memory preview (fixed)
+# (Same requirements as before: gradio 3.x, pydub, pyzipper, python-docx, ffmpeg, whisper or faster-whisper)
 import os
 import sys
 import multiprocessing
 import time
 try:
     import gradio as gr
 except Exception as e:
     print("FATAL: gradio import failed:", e)
     raise
 USE_FASTER_WHISPER = False
 try:
     from faster_whisper import WhisperModel as FasterWhisperModel
     USE_FASTER_WHISPER = True
     print("INFO: faster-whisper available — will use it for faster CPU inference.")
 except Exception:
     try:
+        import whisper
     except Exception:
         print("FATAL: Neither faster-whisper nor whisper available.")
         raise
 from pydub import AudioSegment
 import pyzipper
 from docx import Document
 os.environ["PYTHONUNBUFFERED"] = "1"
 MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
 MIN_WAV_SIZE = 1024
     ("pcm_s16le", 44100, 2),
     ("mulaw", 8000, 1),
 ]
+MODEL_CACHE = {}
+EXTRACT_MAP = {}
 DEFAULT_ZIP_PASS = "dietcoke1"
 CPU_COUNT = max(1, multiprocessing.cpu_count())
+MAX_WORKERS = min(4, CPU_COUNT)
 def load_memory():
     try:
         if os.path.exists(MEMORY_FILE):
         pass
     return mem
 def save_memory(mem):
     with MEMORY_LOCK:
         try:
         except Exception:
             traceback.print_exc()
 memory = load_memory()
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
     "dx": "diagnosis",
     "amoxicillin": "Amoxicillin",
 }
 def expand_abbreviations(text):
     tokens = re.split(r"(\s+)", text)
     out = []
             out.append(t)
     return "".join(out)
 def normalize_drugs(text):
     for k, v in DRUG_NORMALIZATION.items():
         text = re.sub(rf"\b{k}\b", v, text, flags=re.IGNORECASE)
     return text
 def punctuation_and_capitalization(text):
     text = text.strip()
     if not text:
             out.append(p)
     return "".join(out)
 def postprocess_transcript(text):
     if not text:
         return text
     t = punctuation_and_capitalization(t)
     return t
 def extract_words_and_phrases(text):
     words = re.findall(r"[A-Za-z0-9\-']+", text)
     sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
     return [w for w in words if w.strip()], sentences
 def update_memory_with_transcript(transcript):
     global memory
     words, sentences = extract_words_and_phrases(transcript)
         if changed:
             save_memory(memory)
 def memory_correct_text(text, min_ratio=0.85):
     if not text or (not memory.get("words") and not memory.get("phrases")):
         return text
             corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
     return corrected
 def save_as_word(text, filename=None):
     if filename is None:
         filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
     doc.save(filename)
     return filename
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
     try:
         cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
             pass
         return False, str(e)
 def convert_to_wav_if_needed(input_path):
     input_path = str(input_path)
     lower = input_path.lower()
         except Exception:
             pass
     diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
     diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
     diagnostics = []
     raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
 def whisper_available_models():
     try:
         if USE_FASTER_WHISPER:
             return set(["tiny", "base", "small", "medium", "large", "large-v3"])
         else:
             models = whisper.available_models()
         pass
     return set(["tiny", "base", "small", "medium", "large", "large-v3"])
 AVAILABLE_MODEL_SET = whisper_available_models()
 def safe_model_choices(prefer_default="small"):
     base_choices = ["small", "medium", "large", "large-v3", "base", "tiny"]
     choices = [m for m in base_choices if m in AVAILABLE_MODEL_SET]
     default = prefer_default if prefer_default in choices else choices[0]
     return choices, default
 def _worker_transcribe(args):
     try:
         (file_path, model_name, device_name, enable_memory, generate_srt, use_two_pass, fast_model, refine_threshold) = args
         base = os.path.basename(file_path)
         log_lines = []
         device = None if device_name == "auto" else device_name
         model = None
         use_fw = False
         try:
             if USE_FASTER_WHISPER:
                 model = FasterWhisperModel(model_name, device=device if device else "cpu")
                 use_fw = True
                 log_lines.append(f"Worker: faster-whisper loaded {model_name}")
                 log_lines.append(f"Worker: whisper loaded {model_name}")
         except Exception as e:
             log_lines.append(f"Worker model load failed: {e}")
             try:
                 if USE_FASTER_WHISPER:
                     model = FasterWhisperModel("small", device=device if device else "cpu")
             except Exception as e2:
                 return {"file": base, "text_path": None, "srt_path": None, "log": "Model load failed: " + str(e2)}
         try:
             wav = convert_to_wav_if_needed(file_path)
             log_lines.append(f"Converted to WAV: {os.path.basename(wav)}")
         except Exception as e:
             return {"file": base, "text_path": None, "srt_path": None, "log": "Conversion failed: " + str(e)}
         try:
             if use_fw:
                 segments, info = model.transcribe(wav, beam_size=5)
                 text = "".join([seg.text for seg in segments]).strip()
                 if generate_srt:
                     srt_text = []
                     for i, seg in enumerate(segments, start=1):
                         start = getattr(seg, "start", 0)
                         end = getattr(seg, "end", 0)
                         txt = getattr(seg, "text", "").strip()
         except Exception as e:
             return {"file": base, "text_path": None, "srt_path": None, "log": "Transcription failed: " + str(e)}
         if enable_memory and text:
             text = memory_correct_text(text)
         text = postprocess_transcript(text)
         txt_tmp = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
         txt_tmp.close()
         with open(txt_tmp.name, "w", encoding="utf-8") as fh:
                 fh.write(srt_out)
             srt_path = srt_tmp.name
         try:
             if wav and os.path.exists(wav) and not file_path.lower().endswith(".wav"):
                 os.unlink(wav)
         tb = traceback.format_exc()
         return {"file": os.path.basename(file_path) if file_path else "unknown", "text_path": None, "srt_path": None, "log": f"Worker exception: {e}\n{tb}"}
 def _fmt_time(t):
     h = int(t // 3600)
     m = int((t % 3600) // 60)
     ms = int((t - int(t)) * 1000)
     return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
 def _segments_to_srt(segments):
     lines = []
     for i, seg in enumerate(segments, start=1):
         lines.append("")
     return "\n".join(lines)
 def extract_zip_and_map(zip_path, zip_password=None):
     global EXTRACT_MAP
     EXTRACT_MAP = {}
             pass
         return [], f"Extraction failed: {e}"
 def batch_transcribe_parallel_generator(
     friendly_selected,
     uploaded_files,
     refine_threshold=-1.0,
     zip_password=None,
 ):
     logs = []
     transcripts = []
+    per_file_paths = []
     try:
         paths = []
         if friendly_selected:
             for key in friendly_selected:
         logs.append(f"Starting batch of {total} files with up to {MAX_WORKERS} workers.")
         yield "\n\n".join(logs), "", None, 2
         tasks = []
         for p in paths:
             tasks.append((p, model_name, device_name, enable_mem, generate_srt, use_two_pass, fast_model, refine_threshold))
         completed = 0
         with ProcessPoolExecutor(max_workers=min(MAX_WORKERS, total)) as exe:
             futs = {exe.submit(_worker_transcribe, t): t for t in tasks}
             for fut in as_completed(futs):
                 res = fut.result()
                 completed += 1
                 fname = res.get("file")
                 res_log = res.get("log", "")
                 logs.append(f"[{completed}/{total}] {fname}: {res_log}")
                             txt_content = fh.read()
                     transcripts.append(f"FILE: {fname}\n{txt_content}\n")
                     per_file_paths.append((fname, txtp, srtp))
                 pct = int(5 + (completed / total) * 90)
                 yield "\n\n".join(logs), "\n\n".join(transcripts), None, pct
         combined = "\n\n".join(transcripts)
         out_doc = None
         if merge_flag:
             except Exception as e:
                 logs.append(f"Merge failed: {e}")
         if per_file_paths:
             zip_tmp = tempfile.NamedTemporaryFile(suffix=".zip", delete=False)
             zip_tmp.close()
                     try:
                         zf.write(txtp, arcname=arc_txt)
                     except Exception:
                         zf.write(txtp, arcname=os.path.basename(txtp))
                     if srtp and os.path.exists(srtp):
                         arc_srt = f"{fname}.srt"
         logs.append(f"Batch error: {e}\n{tb}")
         yield "\n\n".join(logs), "\n\n".join(transcripts), None, 100
 def _read_file_text_try_encodings(path):
     encodings = ["utf-8", "utf-16", "latin-1"]
     for enc in encodings:
     except Exception:
         return None, None
 def _process_single_memory_text(text):
     added = 0
     try:
                     added += 1
     return added
 def preview_zip_members_for_memory(zip_path):
     members = []
     logs = []
     try:
                     continue
                 name = info.filename
                 _, ext = os.path.splitext(name)
                 if ext.lower() in [".txt", ".json", ".csv", ".list", ".md"]:
                     members.append(name)
                 else:
                     members.append(name)
             if not members:
                 logs.append("No members found in ZIP.")
         logs.append(f"ZIP preview failed: {e}")
     return members, "\n".join(logs)
 def import_memory_files_multiple(uploaded_files, zip_members_to_import=None):
     if not uploaded_files:
         return "No files provided."
     if isinstance(uploaded_files, (str, os.PathLike)):
         uploaded_files = [str(uploaded_files)]
     elif isinstance(uploaded_files, dict) and uploaded_files.get("name"):
                 messages.append(f"Missing: {fp}")
                 continue
             if fp.lower().endswith(".zip"):
                 try:
                     with zipfile.ZipFile(fp, "r") as zf:
                         for info in zf.infolist():
                 except zipfile.BadZipFile:
                     skipped.append(f"Bad zip: {fp}")
                     continue
             text, used_enc = _read_file_text_try_encodings(fp)
             if text is None:
                 skipped.append(fp)
         summary.extend(skipped)
     return "\n".join(summary)
 print("DEBUG: building Gradio UI", flush=True)
 available_choices, default_choice = safe_model_choices(prefer_default="small")
 """
 with gr.Blocks(title="Whisper Transcriber — Parallel + Memory preview", css=CSS) as demo:
     gr.HTML("""
     <script>
     (function() {
     gr.Markdown("<div class='small-note'>Preview ZIP members for memory import, parallel batch transcription, faster-whisper auto-detect, per-file transcript downloads</div>")
     with gr.Tabs():
         with gr.TabItem("Single File"):
             with gr.Row():
                 with gr.Column(scale=1):
                 with gr.Column(scale=1):
                     single_trans_out = gr.Textbox(label="Transcript", lines=14, interactive=False)
                     single_logs = gr.Textbox(label="Logs", lines=8, interactive=False)
             def _do_single(audio, model_name, device_name, mem_on, srt_on):
                 if not audio:
                     return "", "No audio supplied."
                 path = audio if isinstance(audio, str) else (audio.name if hasattr(audio, "name") else str(audio))
+                res = _worker_transcribe((path, model_name, device_name, mem_on, srt_on, False, "small", -1.0))
+                if res.get("text_path"):
+                    try:
+                        with open(res["text_path"], "r", encoding="utf-8", errors="replace") as fh:
+                            content = fh.read()
+                    except Exception:
+                        content = ""
                 else:
                     content = ""
+                logs = res.get("log", "")
+                return content, logs
             trans_single_btn.click(fn=_do_single, inputs=[single_audio, model_sel_single, device_single, mem_single, srt_single], outputs=[single_trans_out, single_logs])
         with gr.TabItem("Batch Transcribe"):
             with gr.Row():
                 with gr.Column(scale=1):
             batch_preview_btn.click(fn=_preview_zip, inputs=[batch_zip, batch_zip_pass], outputs=[batch_preview_out])
             def _start_batch(friendly_selected, uploaded_files, zip_file, zip_pass, model_name, device_name, merge_flag, mem_flag, srt_flag, use_two_pass, fast_model, refine_thresh):
                 up = uploaded_files
                 if isinstance(up, dict) and up.get("name"):
                     up = [up["name"]]
                 fn=_start_batch,
                 inputs=[batch_select, batch_files, batch_zip, batch_zip_pass, batch_model, batch_device, batch_merge, batch_mem, batch_srt, batch_use_two_pass, batch_fast_model, batch_refine_thresh],
                 outputs=[batch_logs_out, batch_combined_out, batch_zip_download, batch_progress],
             )
         with gr.TabItem("Memory"):
             with gr.Row():
                 with gr.Column(scale=1):
             def _preview_many_zip(uploaded):
                 if not uploaded:
                     return "No files."
                 if isinstance(uploaded, dict) and uploaded.get("name"):
                     uploaded = [uploaded["name"]]
                 members_total = []
             mem_clear_btn.click(fn=_clear_mem, inputs=[], outputs=[mem_status])
             mem_view_btn.click(fn=_view_mem, inputs=[], outputs=[mem_status])
         with gr.TabItem("Settings"):
             gr.Markdown("### Settings & tips")
             gr.Markdown(f"- Faster-whisper auto-detected: {USE_FASTER_WHISPER}")
             gr.Markdown(f"- Max workers for parallel transcribe: {MAX_WORKERS}")
             gr.Markdown("- If memory or RAM is limited, set MAX_WORKERS lower in code.")
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     print("DEBUG: launching on port", port)