Spaces:

staraks
/

arvind

Sleeping

App Files Files Community

staraks commited on Nov 21, 2025

Commit

b7cd478

verified ·

1 Parent(s): 68f2294

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -1243

app.py CHANGED Viewed

@@ -1,1116 +1,3 @@
-Skip to content
-Chat history
-You said:
-# app.py
-# Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
-# Clean, single-version file for Hugging Face Spaces.
-import os
-import sys
-import json
-import shutil
-import tempfile
-import subprocess
-import traceback
-import threading
-import re
-from difflib import get_close_matches
-# Force unbuffered output so container logs show prints immediately
-os.environ["PYTHONUNBUFFERED"] = "1"
-print("DEBUG: app.py bootstrap starting", flush=True)
-# Third-party imports (must be installed in the environment)
-try:
-    from docx import Document
-    import whisper
-    import gradio as gr
-    import pyzipper
-    from pydub import AudioSegment
-except Exception as e:
-    print("FATAL: import error for third-party libs:", e, flush=True)
-    traceback.print_exc()
-    raise
-print("DEBUG: imports OK", flush=True)
-# ---------- Config ----------
-MEMORY_FILE = "memory.json"
-MEMORY_LOCK = threading.Lock()
-MIN_WAV_SIZE = 200  # bytes
-# Small ffmpeg fallback grid (hybrid conversion)
-FFMPEG_CANDIDATES = [
-    ("s16le", 16000, 1),
-    ("s16le", 44100, 2),
-    ("pcm_s16le", 16000, 1),
-    ("pcm_s16le", 44100, 2),
-    ("mulaw", 8000, 1),
-]
-# ----------------------------
-# ---------- Memory helpers ----------
-def load_memory():
-    try:
-        if os.path.exists(MEMORY_FILE):
-            with open(MEMORY_FILE, "r", encoding="utf-8") as fh:
-                return json.load(fh)
-    except Exception:
-        pass
-    mem = {"words": {}, "phrases": {}}
-    try:
-        with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
-            json.dump(mem, fh, ensure_ascii=False, indent=2)
-    except Exception:
-        pass
-    return mem
-def save_memory(mem):
-    with MEMORY_LOCK:
-        with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
-            json.dump(mem, fh, ensure_ascii=False, indent=2)
-memory = load_memory()
-print(
-    "DEBUG: memory loaded (words=%d phrases=%d)"
-    % (len(memory.get("words", {})), len(memory.get("phrases", {}))),
-    flush=True,
-)
-# ---------- Postprocessing ----------
-MEDICAL_ABBREVIATIONS = {
-    "pt": "patient",
-    "dx": "diagnosis",
-    "hx": "history",
-    "sx": "symptoms",
-    "c/o": "complains of",
-    "bp": "blood pressure",
-    "hr": "heart rate",
-    "o2": "oxygen",
-    "r/o": "rule out",
-    "adm": "admit",
-    "disch": "discharge",
-}
-DRUG_NORMALIZATION = {
-    "metformin": "Metformin",
-    "aspirin": "Aspirin",
-    "amoxicillin": "Amoxicillin",
-}
-def expand_abbreviations(text):
-    tokens = re.split(r"(\s+)", text)
-    out = []
-    for t in tokens:
-        key = t.lower().strip(".,;:")
-        if key in MEDICAL_ABBREVIATIONS:
-            trailing = ""
-            m = re.match(r"([A-Za-z0-9/]+)([.,;:]*)", t)
-            if m:
-                trailing = m.group(2) or ""
-            out.append(MEDICAL_ABBREVIATIONS[key] + trailing)
-        else:
-            out.append(t)
-    return "".join(out)
-def normalize_drugs(text):
-    for k, v in DRUG_NORMALIZATION.items():
-        text = re.sub(rf"\b{k}\b", v, text, flags=re.IGNORECASE)
-    return text
-def punctuation_and_capitalization(text):
-    text = text.strip()
-    if not text:
-        return text
-    if not re.search(r"[.?!]\s*$", text):
-        text = text.rstrip() + "."
-    parts = re.split(r"([.?!]\s+)", text)
-    out = []
-    for p in parts:
-        if p and not re.match(r"[.?!]\s+", p):
-            out.append(p.capitalize())
-        else:
-            out.append(p)
-    return "".join(out)
-def postprocess_transcript(text, format_soap=False):
-    if not text:
-        return text
-    t = re.sub(r"\s+", " ", text).strip()
-    t = expand_abbreviations(t)
-    t = normalize_drugs(t)
-    t = punctuation_and_capitalization(t)
-    if format_soap:
-        sentences = re.split(r"(?<=[.?!])\s+", t)
-        subj = sentences[0] if len(sentences) >= 1 else ""
-        obj = sentences[1] if len(sentences) >= 2 else ""
-        assessment = ""
-        for kw in ["diagnosis", "dx", "rule out", "r/o", "probable"]:
-            if kw in t.lower():
-                assessment = "Assessment: " + subj
-                break
-        soap = (
-            f"S: {subj}\nO: {obj}\nA: {assessment}\nP: Plan: follow up as indicated."
-        )
-        return soap
-    return t
-# ---------- Memory utilities ----------
-def extract_words_and_phrases(text):
-    # basic tokenization for words; phrases = sentences
-    words = re.findall(r"[A-Za-z0-9\-']+", text)
-    sentences = [
-        s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()
-    ]
-    return [w for w in words if w.strip()], sentences
-def update_memory_with_transcript(transcript):
-    global memory
-    words, sentences = extract_words_and_phrases(transcript)
-    changed = False
-    with MEMORY_LOCK:
-        for w in words:
-            lw = w.lower()
-            if lw in memory["words"]:
-                memory["words"][lw] += 1
-            else:
-                memory["words"][lw] = 1
-                changed = True
-        for s in sentences:
-            key = s.strip()
-            if key in memory["phrases"]:
-                memory["phrases"][key] += 1
-            else:
-                memory["phrases"][key] = 1
-                changed = True
-        if changed:
-            try:
-                with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
-                    json.dump(memory, fh, ensure_ascii=False, indent=2)
-            except Exception:
-                pass
-def memory_correct_text(text, min_ratio=0.85):
-    if not text or (not memory.get("words") and not memory.get("phrases")):
-        return text
-    def fix_word(w):
-        lw = w.lower()
-        if lw in memory["words"]:
-            return w
-        candidates = get_close_matches(
-            lw, memory["words"].keys(), n=1, cutoff=min_ratio
-        )
-        if candidates:
-            cand = candidates[0]
-            if w and w[0].isupper():
-                return cand.capitalize()
-            return cand
-        return w
-    tokens = re.split(r"(\W+)", text)
-    corrected_tokens = []
-    for tok in tokens:
-        if re.match(r"^[A-Za-z0-9\-']+$", tok):
-            corrected_tokens.append(fix_word(tok))
-        else:
-            corrected_tokens.append(tok)
-    corrected = "".join(corrected_tokens)
-    for phrase in sorted(memory.get("phrases", {}).keys(), key=lambda s: -len(s)):
-        low_phrase = phrase.lower()
-        if len(low_phrase) < 8:
-            continue
-        if low_phrase in corrected.lower():
-            corrected = re.sub(
-                re.escape(phrase), phrase, corrected, flags=re.IGNORECASE
-            )
-    return corrected
-# ---------- File utilities ----------
-def save_as_word(text, filename=None):
-    if filename is None:
-        filename = os.path.join(
-            tempfile.gettempdir(), "merged_transcripts.docx"
-        )
-    doc = Document()
-    doc.add_paragraph(text)
-    doc.save(filename)
-    return filename
-# ---------- Hybrid conversion: pydub + small ffmpeg fallback ----------
-def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
-    cmd = [
-        "ffmpeg",
-        "-hide_banner",
-        "-loglevel",
-        "error",
-        "-y",
-        "-f",
-        fmt,
-        "-ar",
-        str(sr),
-        "-ac",
-        str(ch),
-        "-i",
-        input_path,
-        out_path,
-    ]
-    try:
-        proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
-        if (
-            proc.returncode == 0
-            and os.path.exists(out_path)
-            and os.path.getsize(out_path) > MIN_WAV_SIZE
-        ):
-            return True, proc.stderr + proc.stdout
-        else:
-            try:
-                if os.path.exists(out_path):
-                    os.unlink(out_path)
-            except Exception:
-                pass
-            return False, proc.stderr + proc.stdout
-    except Exception as e:
-        try:
-            if os.path.exists(out_path):
-                os.unlink(out_path)
-        except Exception:
-            pass
-        return False, str(e)
-def convert_to_wav_if_needed(input_path):
-    input_path = str(input_path)
-    lower = input_path.lower()
-    if lower.endswith(".wav"):
-        return input_path
-    auto_err = ""
-    tmp = None
-    try:
-        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        tmp.close()
-        AudioSegment.from_file(input_path).export(tmp.name, format="wav")
-        if os.path.exists(tmp.name) and os.path.getsize(tmp.name) > MIN_WAV_SIZE:
-            return tmp.name
-        else:
-            try:
-                os.unlink(tmp.name)
-            except Exception:
-                pass
-    except Exception:
-        auto_err = traceback.format_exc()
-        try:
-            if tmp and os.path.exists(tmp.name):
-                os.unlink(tmp.name)
-        except Exception:
-            pass
-    # ffmpeg fallback
-    diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
-    diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
-    diagnostics = []
-    for fmt, sr, ch in FFMPEG_CANDIDATES:
-        out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        out_wav.close()
-        success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
-        diagnostics.append(
-            f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n"
-        )
-        if success:
-            try:
-                with open(diag_log, "w", encoding="utf-8") as fh:
-                    fh.write("pydub auto error:\n")
-                    fh.write(auto_err + "\n\n")
-                    fh.write("Successful ffmpeg candidate:\n")
-                    fh.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
-                    fh.write("Diagnostics:\n")
-                    fh.write("\n".join(diagnostics))
-            except Exception:
-                pass
-            return out_wav.name
-        else:
-            try:
-                if os.path.exists(out_wav.name):
-                    os.unlink(out_wav.name)
-            except Exception:
-                pass
-    # final diagnostics
-    try:
-        fp = subprocess.run(
-            ["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
-            capture_output=True,
-            text=True,
-            timeout=10,
-        )
-        diagnostics.append("FFPROBE:\n" + (fp.stdout.strip() or fp.stderr.strip()))
-    except Exception as e:
-        diagnostics.append("ffprobe failed: " + str(e))
-    try:
-        with open(input_path, "rb") as fh:
-            head = fh.read(512)
-            diagnostics.append("HEX PREVIEW:\n" + head.hex())
-    except Exception as e:
-        diagnostics.append("could not read head: " + str(e))
-    try:
-        with open(diag_log, "w", encoding="utf-8") as fh:
-            fh.write("pydub auto error:\n")
-            fh.write(auto_err + "\n\n")
-            fh.write("Full diagnostics:\n\n")
-            fh.write("\n\n".join(diagnostics))
-    except Exception as e:
-        raise Exception(f"Conversion failed; diagnostics write error: {e}")
-    raise Exception(
-        f"Could not convert file to WAV. Diagnostics saved to: {diag_log}"
-    )
-# ---------- Whisper model cache ----------
-MODEL_CACHE = {}
-def get_whisper_model(name):
-    if name not in MODEL_CACHE:
-        print(f"DEBUG: loading whisper model '{name}'", flush=True)
-        MODEL_CACHE[name] = whisper.load_model(name)
-    return MODEL_CACHE[name]
-# ---------- Main transcription generator ----------
-def transcribe_multiple(
-    audio_files,
-    model_name,
-    advanced_options,
-    merge_checkbox,
-    zip_file=None,
-    zip_password=None,
-    enable_memory=False,
-):
-    """
-    Generator yields (log_text, transcripts_text, merged_file_path_or_None, percent_int)
-    """
-    log = []
-    transcripts = []
-    word_file_path = None
-    temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
-    extracted_audio_paths = []
-    # initial yield
-    yield "", "", None, 0
-    # cleanup previous temp dir
-    if os.path.exists(temp_extract_dir):
-        try:
-            shutil.rmtree(temp_extract_dir)
-            log.append(f"Cleaned previous temp dir: {temp_extract_dir}")
-        except Exception:
-            pass
-    # handle zip
-    if zip_file:
-        log.append(f"Processing zip: {zip_file}")
-        yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
-        try:
-            os.makedirs(temp_extract_dir, exist_ok=True)
-            with pyzipper.ZipFile(zip_file, "r") as zf:
-                if zip_password:
-                    try:
-                        zf.setpassword(zip_password.encode())
-                    except Exception:
-                        log.append("Incorrect zip password")
-                        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-                        return
-                exts = [
-                    ".mp3",
-                    ".wav",
-                    ".aac",
-                    ".flac",
-                    ".ogg",
-                    ".m4a",
-                    ".dat",
-                    ".dct",
-                ]
-                count = 0
-                for info in zf.infolist():
-                    if info.is_dir():
-                        continue
-                    _, ext = os.path.splitext(info.filename)
-                    if ext.lower() in exts:
-                        try:
-                            zf.extract(info, path=temp_extract_dir)
-                        except Exception as e:
-                            log.append(f"Error extracting {info.filename}: {e}")
-                            continue
-                        p = os.path.normpath(
-                            os.path.join(temp_extract_dir, info.filename)
-                        )
-                        if os.path.exists(p):
-                            extracted_audio_paths.append(p)
-                            count += 1
-                            log.append(f"Extracted: {info.filename}")
-                if count == 0:
-                    log.append("No supported audio in zip.")
-                    try:
-                        shutil.rmtree(temp_extract_dir)
-                    except Exception:
-                        pass
-                    yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-                    return
-        except pyzipper.BadZipFile:
-            log.append("Invalid zip file.")
-            try:
-                shutil.rmtree(temp_extract_dir)
-            except Exception:
-                pass
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-            return
-        except Exception as e:
-            log.append(f"Zip processing error: {e}")
-            try:
-                shutil.rmtree(temp_extract_dir)
-            except Exception:
-                pass
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-            return
-    # collect audio file paths
-    paths = []
-    if extracted_audio_paths:
-        paths.extend(extracted_audio_paths)
-    if audio_files:
-        if isinstance(audio_files, (list, tuple)):
-            for a in audio_files:
-                if a:
-                    paths.append(a)
-        elif isinstance(audio_files, str):
-            paths.append(audio_files)
-    if not paths:
-        log.append("No audio files provided.")
-        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-        return
-    # load model
-    yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
-    try:
-        model = get_whisper_model(model_name)
-        log.append(f"Loaded Whisper model: {model_name}")
-    except Exception as e:
-        log.append(f"Failed to load model {model_name}: {e}")
-        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-        return
-    total = len(paths)
-    idx = 0
-    for p in paths:
-        idx += 1
-        log.append(f"Processing file ({idx}/{total}): {p}")
-        yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
-            5 + (idx - 1) * 80 / max(1, total)
-        )
-        wav = None
-        try:
-            wav = convert_to_wav_if_needed(p)
-            log.append(f"Converted to WAV: {wav}")
-        except Exception as e:
-            log.append(f"Conversion failed for {p}: {e}")
-            transcripts.append(
-                f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}"
-            )
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
-                5 + idx * 80 / max(1, total)
-            )
-            continue
-        try:
-            whisper_opts = {}
-            if isinstance(advanced_options, dict):
-                whisper_opts.update(advanced_options)
-            result = model.transcribe(wav, **whisper_opts)
-            text = result.get("text", "").strip()
-            log.append(f"Transcribed: {len(text)} chars")
-            if enable_memory:
-                text = memory_correct_text(text)
-            text = postprocess_transcript(text)
-            transcripts.append(
-                f"FILE: {os.path.basename(p)}\n{text}\n"
-            )
-            if enable_memory:
-                try:
-                    update_memory_with_transcript(text)
-                    log.append("Memory updated.")
-                except Exception:
-                    pass
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
-                10 + idx * 85 / max(1, total)
-            )
-        except Exception as e:
-            log.append(f"Transcription failed for {p}: {e}")
-            transcripts.append(
-                f"FILE: {os.path.basename(p)}\nERROR: Transcription failed: {e}"
-            )
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
-                10 + idx * 85 / max(1, total)
-            )
-            continue
-        finally:
-            try:
-                if wav and os.path.exists(wav):
-                    tmpdir = tempfile.gettempdir()
-                    if (
-                        os.path.commonpath([tmpdir, os.path.abspath(wav)])
-                        == tmpdir
-                        and not p.lower().endswith(".wav")
-                    ):
-                        os.unlink(wav)
-            except Exception:
-                pass
-    # final merge option
-    if merge_checkbox:
-        try:
-            merged_text = "\n\n".join(transcripts)
-            word_file_path = save_as_word(merged_text)
-            log.append(f"Merged transcript saved: {word_file_path}")
-        except Exception as e:
-            log.append(f"Failed to save merged file: {e}")
-            word_file_path = None
-    # final yield
-    yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
-    # cleanup extracted dir
-    try:
-        if os.path.exists(temp_extract_dir):
-            shutil.rmtree(temp_extract_dir)
-            log.append("Cleaned temporary extraction dir.")
-    except Exception:
-        pass
-# ----------------------- Gradio wrapper (streaming) -----------------------
-def run_transcription_wrapper(
-    files,
-    model_name,
-    merge,
-    zip_file,
-    zip_password,
-    use_default_zip_pass,
-    default_zip_password,
-    enable_memory,
-    advanced_options_state,
-):
-    """
-    Gradio callback that streams results from transcribe_multiple.
-    This MUST be a generator (use yield) so Gradio receives 4 outputs,
-    not a single generator object.
-    """
-    try:
-        audio_input = files
-        # Normalize zip path
-        zip_path = None
-        if zip_file:
-            if isinstance(zip_file, (str, os.PathLike)):
-                zip_path = str(zip_file)
-            elif hasattr(zip_file, "name"):
-                zip_path = zip_file.name
-            elif isinstance(zip_file, dict) and zip_file.get("name"):
-                zip_path = zip_file["name"]
-        # Decide final zip password
-        if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
-            final_zip_password = default_zip_password
-        else:
-            final_zip_password = zip_password
-        adv = {}  # placeholder for future advanced options
-        # Stream results from the core generator
-        for logs, transcripts, word_path, percent in transcribe_multiple(
-            audio_input,
-            model_name,
-            adv,
-            merge_checkbox=merge,
-            zip_file=zip_path,
-            zip_password=final_zip_password,
-            enable_memory=enable_memory,
-        ):
-            yield logs, transcripts, word_path, percent
-    except Exception:
-        tb = traceback.format_exc()
-        logs = f"EXCEPTION in run_transcription_wrapper:\n{tb}"
-        transcripts = "ERROR: transcription did not start or failed unexpectedly."
-        yield logs, transcripts, None, 100
-print("DEBUG: building Gradio Blocks", flush=True)
-# ---------- Minimal, easy-to-use UI ----------
-with gr.Blocks(title="Whisper Transcriber") as demo:
-    gr.Markdown(
-        "## Whisper Transcriber\n"
-        "Upload audio files or a ZIP on the left and click **Transcribe**.\n"
-        "Transcript, progress, download, and logs appear on the right."
-    )
-    with gr.Row():
-        # LEFT: Inputs (small boxes, simple controls)
-        with gr.Column(scale=1):
-            gr.Markdown("### Input")
-            file_input = gr.File(
-                label="Audio files",
-                file_count="multiple",
-                type="filepath",
-                height=60,  # small box
-            )
-            zip_input = gr.File(
-                label="ZIP with audio (optional)",
-                file_count="single",
-                type="filepath",
-                height=60,  # small box
-            )
-            use_default_zip_pass = gr.Checkbox(
-                label="Use default ZIP password",
-                value=False,
-            )
-            default_zip_password = gr.Textbox(
-                label="Default ZIP password",
-                value="dietcoke1",  # you can change this
-                interactive=True,
-            )
-            zip_password = gr.Textbox(
-                label="ZIP password (override)",
-                placeholder="If empty, default password will be used",
-            )
-            model_select = gr.Dropdown(
-                choices=["small", "medium", "large", "base"],
-                value="small",
-                label="Whisper model",
-            )
-            merge_checkbox = gr.Checkbox(
-                label="Merge all transcripts into one .docx",
-                value=True,
-            )
-            memory_checkbox = gr.Checkbox(
-                label="Enable correction memory",
-                value=False,
-            )
-            submit = gr.Button("Transcribe", variant="primary")
-        # RIGHT: Outputs (Transcript → Progress → Download → Logs)
-        with gr.Column(scale=1):
-            gr.Markdown("### Output")
-            transcripts_out = gr.Textbox(
-                label="Transcript",
-                lines=18,
-                interactive=False,
-            )
-            progress_num = gr.Slider(
-                minimum=0,
-                maximum=100,
-                value=0,
-                step=1,
-                label="Progress (%)",
-                interactive=False,
-            )
-            download_file = gr.File(
-                label="Merged .docx (when available)"
-            )
-            logs = gr.Textbox(
-                label="Logs",
-                lines=10,
-                interactive=False,
-            )
-    submit.click(
-        fn=run_transcription_wrapper,
-        inputs=[
-            file_input,
-            model_select,
-            merge_checkbox,
-            zip_input,
-            zip_password,
-            use_default_zip_pass,
-            default_zip_password,
-            memory_checkbox,
-            gr.State({}),
-        ],
-        # The function yields: logs, transcripts, download_file, progress
-        outputs=[logs, transcripts_out, download_file, progress_num],
-    )
-# ---------- Launch ----------
-if __name__ == "__main__":
-    port = int(os.environ.get("PORT", 7860))
-    print("DEBUG: launching Gradio on port", port, flush=True)
-    try:
-        demo.queue().launch(server_name="0.0.0.0", server_port=port)
-    except Exception as e:
-        print("FATAL: demo.launch failed:", e, flush=True)
-        traceback.print_exc()
-        raise
-ChatGPT said:
-Thought for 7s
-Nice, thanks — this is a solid, well-structured transcription app. I ran a careful code review and made a few targeted fixes and improvements you can drop into your file to make it more robust and reliable (especially around ffmpeg fallback conversion, ZIP password handling, temp-file cleanup, and a couple of small safety/usability items).
-Below I list the important issues I found, why they matter, and then provide the concrete patched code snippets you can replace in your file. I kept changes minimal and backward-compatible.
-Major issues & fixes (summary)
-ffmpeg conversion command was fragile
-You used -f <fmt> before -i which tells ffmpeg the input format is raw (s16le/pcm), and that often fails for containerized inputs. Better: call ffmpeg -i input -ar <sr> -ac <ch> out.wav (let ffmpeg auto-detect input). If you still want raw-format attempts, do them explicitly only when necessary. The patch changes the command so ffmpeg autodetects input and sets output sampling/channel only.
-Diagnostics/logging & cleanup
-If conversion fails we should remove partial files and write diagnostics safely. I preserved your diagnostics but improved removal and error messages.
-ZIP password handling with pyzipper
-zf.setpassword(...) simply sets a default password — it does not throw on set. Extraction will raise if password is wrong. I changed code to try extract and catch RuntimeError/RuntimeError/pyzipper.BadZipFile where appropriate and produce a clearer "Wrong password" message.
-Temporary WAV deletion logic
-The logic that deletes intermediate WAV relied on checking os.path.commonpath([tmpdir, os.path.abspath(wav)]) == tmpdir — that may raise if paths are on different drives. I hardened that check and wrapped in try/except.
-Model loading
-I kept your model cache but suggested an optional device override if you want to put it on CPU/GPU. (Small note: confirm whisper.load_model(name) is the right API for the whisper package/version you are using.)
-Minor UI suggestion
-Consider setting file_input's type="file" or keeping "filepath" with a fallback handler — your run_transcription_wrapper already copes with zip_file being file-like or path-like, so it's fine. (Left unchanged.)
-Replace these functions with the patched versions
-Paste these function definitions into your file to replace the originals for _ffmpeg_convert, convert_to_wav_if_needed, and the ZIP extraction block in transcribe_multiple. I preserved your variable names and diagnostics as much as possible.
-# ---------- improved ffmpeg convert ----------
-def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
-    """
-    Use ffmpeg to convert input_path -> out_path.
-    We let ffmpeg autodetect input format (so don't force -f except if fmt == 'raw').
-    If fmt is one of the raw formats (s16le/pcm_s16le/mulaw) we treat it as raw input;
-    otherwise we let ffmpeg autodetect and only set sample rate/channels on output.
-    """
-    try:
-        cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
-        # if fmt indicates raw input format, we need to tell ffmpeg about it
-        if fmt in ("s16le", "pcm_s16le", "mulaw"):
-            # raw PCM input requires -f and input format description before -i
-            cmd += ["-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path]
-            # output will be WAV (ffmpeg determines container from out_path)
-            cmd += [out_path]
-        else:
-            # For most candidate attempts, let ffmpeg autodetect input format.
-            # Request output sample rate/channels explicitly.
-            cmd += ["-i", input_path, "-ar", str(sr), "-ac", str(ch), out_path]
-        proc = subprocess.run(cmd, capture_output=True, timeout=60, text=True)
-        stdout_stderr = proc.stdout + proc.stderr
-        if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
-            return True, stdout_stderr
-        else:
-            # remove partial output if exists
-            try:
-                if os.path.exists(out_path):
-                    os.unlink(out_path)
-            except Exception:
-                pass
-            return False, stdout_stderr
-    except Exception as e:
-        try:
-            if os.path.exists(out_path):
-                os.unlink(out_path)
-        except Exception:
-            pass
-        return False, str(e)
-def convert_to_wav_if_needed(input_path):
-    input_path = str(input_path)
-    lower = input_path.lower()
-    if lower.endswith(".wav"):
-        return input_path
-    auto_err = ""
-    tmp = None
-    try:
-        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        tmp.close()
-        # let pydub autodetect format for most files
-        AudioSegment.from_file(input_path).export(tmp.name, format="wav")
-        if os.path.exists(tmp.name) and os.path.getsize(tmp.name) > MIN_WAV_SIZE:
-            return tmp.name
-        else:
-            # bad export, remove and fall through to ffmpeg tries
-            try:
-                os.unlink(tmp.name)
-            except Exception:
-                pass
-    except Exception:
-        auto_err = traceback.format_exc()
-        try:
-            if tmp and os.path.exists(tmp.name):
-                os.unlink(tmp.name)
-        except Exception:
-            pass
-    # ffmpeg fallback
-    diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
-    diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
-    diagnostics = []
-    for fmt, sr, ch in FFMPEG_CANDIDATES:
-        out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        out_wav.close()
-        success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
-        diagnostics.append(f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n")
-        if success:
-            try:
-                with open(diag_log, "w", encoding="utf-8") as fh:
-                    fh.write("pydub auto error:\n")
-                    fh.write(auto_err + "\n\n")
-                    fh.write("Successful ffmpeg candidate:\n")
-                    fh.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
-                    fh.write("Diagnostics:\n")
-                    fh.write("\n".join(diagnostics))
-            except Exception:
-                pass
-            return out_wav.name
-        else:
-            try:
-                if os.path.exists(out_wav.name):
-                    os.unlink(out_wav.name)
-            except Exception:
-                pass
-    # final diagnostics: ffprobe + file head
-    try:
-        fp = subprocess.run(
-            ["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
-            capture_output=True,
-            text=True,
-            timeout=10,
-        )
-        diagnostics.append("FFPROBE:\n" + (fp.stdout.strip() or fp.stderr.strip()))
-    except Exception as e:
-        diagnostics.append("ffprobe failed: " + str(e))
-    try:
-        with open(input_path, "rb") as fh:
-            head = fh.read(512)
-            diagnostics.append("HEX PREVIEW:\n" + head.hex())
-    except Exception as e:
-        diagnostics.append("could not read head: " + str(e))
-    try:
-        with open(diag_log, "w", encoding="utf-8") as fh:
-            fh.write("pydub auto error:\n")
-            fh.write(auto_err + "\n\n")
-            fh.write("Full diagnostics:\n\n")
-            fh.write("\n\n".join(diagnostics))
-    except Exception as e:
-        # keep original behavior: surface a readable exception
-        raise Exception(f"Conversion failed; diagnostics write error: {e}")
-    raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
-Replace the ZIP extraction block inside transcribe_multiple with this improved version
-Find this earlier in transcribe_multiple:
-    if zip_file:
-        log.append(f"Processing zip: {zip_file}")
-        yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
-        try:
-            os.makedirs(temp_extract_dir, exist_ok=True)
-            with pyzipper.ZipFile(zip_file, "r") as zf:
-                if zip_password:
-                    try:
-                        zf.setpassword(zip_password.encode())
-                    except Exception:
-                        log.append("Incorrect zip password")
-                        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-                        return
-                exts = [...]
-                ...
-Replace the whole if zip_file: block with this:
-    if zip_file:
-        log.append(f"Processing zip: {zip_file}")
-        yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
-        try:
-            os.makedirs(temp_extract_dir, exist_ok=True)
-            with pyzipper.ZipFile(zip_file, "r") as zf:
-                # set password if provided (pyzipper accepts bytes)
-                if zip_password:
-                    try:
-                        zf.setpassword(zip_password.encode())
-                    except Exception:
-                        # setpassword typically doesn't fail, but just in case
-                        log.append("Failed to set zip password (unexpected).")
-                exts = [
-                    ".mp3",
-                    ".wav",
-                    ".aac",
-                    ".flac",
-                    ".ogg",
-                    ".m4a",
-                    ".dat",
-                    ".dct",
-                ]
-                count = 0
-                for info in zf.infolist():
-                    if info.is_dir():
-                        continue
-                    _, ext = os.path.splitext(info.filename)
-                    if ext.lower() in exts:
-                        try:
-                            # attempt to extract each file; if wrong password, extraction will raise
-                            zf.extract(info, path=temp_extract_dir)
-                        except RuntimeError as e:
-                            # pyzipper may raise RuntimeError on bad password
-                            log.append(f"Password required or incorrect for {info.filename}: {e}")
-                            continue
-                        except pyzipper.BadZipFile:
-                            log.append(f"Bad zip entry: {info.filename}")
-                            continue
-                        except Exception as e:
-                            log.append(f"Error extracting {info.filename}: {e}")
-                            continue
-                        p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
-                        if os.path.exists(p):
-                            extracted_audio_paths.append(p)
-                            count += 1
-                            log.append(f"Extracted: {info.filename}")
-                if count == 0:
-                    log.append("No supported audio in zip.")
-                    try:
-                        shutil.rmtree(temp_extract_dir)
-                    except Exception:
-                        pass
-                    yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-                    return
-        except pyzipper.BadZipFile:
-            log.append("Invalid zip file.")
-            try:
-                shutil.rmtree(temp_extract_dir)
-            except Exception:
-                pass
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-            return
-        except Exception as e:
-            log.append(f"Zip processing error: {e}")
-            try:
-                shutil.rmtree(temp_extract_dir)
-            except Exception:
-                pass
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-            return
-Small hardening for temporary WAV deletion (replace your existing finally block)
-Replace the finally that deletes intermediate WAVs with this safer variant:
-            finally:
-                try:
-                    if wav and os.path.exists(wav):
-                        tmpdir = tempfile.gettempdir()
-                        try:
-                            # Do not remove original .wav files that came from user (only remove conversion outputs)
-                            common = os.path.commonpath([os.path.abspath(tmpdir), os.path.abspath(wav)])
-                            if common == os.path.abspath(tmpdir) and not p.lower().endswith(".wav"):
-                                os.unlink(wav)
-                        except Exception:
-                            # fallback: only remove if it's in tempdir substring
-                            try:
-                                if tmpdir in os.path.abspath(wav) and not p.lower().endswith(".wav"):
-                                    os.unlink(wav)
-                            except Exception:
-                                pass
-                except Exception:
-                    pass
-Optional suggestions (non-blocking)
-Consider making MIN_WAV_SIZE a bit larger (e.g., 1024 bytes) — 200 bytes is tiny and can be a partial/invalid file.
-Consider exposing a device option for the Whisper model (e.g. cpu vs cuda) and pass that into whisper.load_model.
-default_zip_password is in your UI — don't keep real secrets there; make default generic or blank.
-If you want progress more precise, you could measure durations of actual model transcribe calls and update percent accordingly.
-If you run on Hugging Face Spaces, ensure the whisper package you installed matches your API expectations (some forks differ).
-Why these changes help (TL;DR)
-Letting ffmpeg autodetect input prevents many conversion failures on real-world audio files (mp3, m4a, flac, etc.).
-Proper diagnostic capture + safer temp-file cleanup prevents stuck files and makes debugging easier.
-More robust zip/password extraction reduces silent failures and makes user feedback clearer.
-If you want, I can:
-produce a full patched app.py with the above replacements merged (ready to paste), or
-convert model loading to allow mobile/CPU-only fallback and add a small "device" dropdown in the UI, or
-update the UI to add a "select which extracted files to transcribe" step (you had requested that in earlier project notes).
-Tell me which of those you want and I’ll give the full code in one paste.
-You said:
-full code with option to memories old words
-ChatGPT said:
-Thought for 5s
-Nice — below is a full, ready-to-paste app.py that:
-Includes the improved ffmpeg fallback conversion and safer temp cleanup from my earlier suggestions.
-Improves ZIP extraction & password handling.
-Adds Memory management UI and functions so you can:
-Enable/disable the correction memory (existing checkbox).
-Import an existing memory file (JSON or newline word list).
-Add a single word or phrase to memory manually.
-Clear the memory.
-View current memory (words & phrases counts).
-Keeps the original transcription flow and UI layout; I only extended the left-side controls to include the memory operations.
-Paste this into your repo (replace your current app.py). I preserved your original structure and most variable names.
 # app.py
 # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
 # Clean, single-version file for Hugging Face Spaces.
@@ -1148,7 +35,7 @@ print("DEBUG: imports OK", flush=True)
 # ---------- Config ----------
 MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
-MIN_WAV_SIZE = 1024  # raised slightly from 200 for safety
 FFMPEG_CANDIDATES = [
     ("s16le", 16000, 1),
     ("s16le", 44100, 2),
@@ -1164,7 +51,6 @@ def load_memory():
         if os.path.exists(MEMORY_FILE):
             with open(MEMORY_FILE, "r", encoding="utf-8") as fh:
                 data = json.load(fh)
-                # validate minimal structure
                 if not isinstance(data, dict):
                     raise ValueError("memory.json root not dict")
                 data.setdefault("words", {})
@@ -1197,7 +83,6 @@ print(
     flush=True,
 )
 # ---------- Postprocessing ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
@@ -1283,11 +168,8 @@ def postprocess_transcript(text, format_soap=False):
 # ---------- Memory utilities ----------
 def extract_words_and_phrases(text):
-    # basic tokenization for words; phrases = sentences
     words = re.findall(r"[A-Za-z0-9\-']+", text)
-    sentences = [
-        s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()
-    ]
     return [w for w in words if w.strip()], sentences
@@ -1326,9 +208,7 @@ def memory_correct_text(text, min_ratio=0.85):
         lw = w.lower()
         if lw in memory["words"]:
             return w
-        candidates = get_close_matches(
-            lw, memory["words"].keys(), n=1, cutoff=min_ratio
-        )
         if candidates:
             cand = candidates[0]
             if w and w[0].isupper():
@@ -1350,24 +230,15 @@ def memory_correct_text(text, min_ratio=0.85):
         if len(low_phrase) < 8:
             continue
         if low_phrase in corrected.lower():
-            corrected = re.sub(
-                re.escape(phrase), phrase, corrected, flags=re.IGNORECASE
-            )
     return corrected
 # ---------- Memory management UI helpers ----------
 def import_memory_file(uploaded):
-    """
-    Accepts an uploaded file object or filepath. Accepts:
-     - JSON of shape {"words": {...}, "phrases": {...}}
-     - Plain newline-separated words or CSV-like lines "word,count"
-    Returns status message.
-    """
     global memory
     if not uploaded:
         return "No file provided."
     path = None
     try:
         if isinstance(uploaded, (str, os.PathLike)):
@@ -1378,17 +249,12 @@ def import_memory_file(uploaded):
             path = uploaded["name"]
         else:
             return "Unable to determine uploaded file path."
-        # read file
         with open(path, "r", encoding="utf-8") as fh:
             raw = fh.read()
-        # try JSON first
         try:
             parsed = json.loads(raw)
             if isinstance(parsed, dict):
                 with MEMORY_LOCK:
-                    # merge words/phrases
                     parsed_words = parsed.get("words", {})
                     parsed_phrases = parsed.get("phrases", {})
                     for k, v in parsed_words.items():
@@ -1398,10 +264,7 @@ def import_memory_file(uploaded):
                     save_memory(memory)
                 return f"Imported JSON memory (words={len(parsed_words)}, phrases={len(parsed_phrases)})."
         except Exception:
-            # not JSON, fallback to newline parse
             pass
-        # fallback: split lines, attempt "word,count" or just "word"
         lines = [l.strip() for l in raw.splitlines() if l.strip()]
         added_words = 0
         added_phrases = 0
@@ -1417,7 +280,6 @@ def import_memory_file(uploaded):
                     memory["words"][key] = memory["words"].get(key, 0) + cnt
                     added_words += 1
                 else:
-                    # treat as word (if short) else as phrase
                     if len(line.split()) <= 3:
                         key = line.lower()
                         memory["words"][key] = memory["words"].get(key, 0) + 1
@@ -1433,9 +295,6 @@ def import_memory_file(uploaded):
 def add_memory_entry(entry):
-    """
-    Add a single 'word' or phrase. If entry is short (<=3 words) treat as word, else phrase.
-    """
     global memory
     if not entry or not entry.strip():
         return "No entry provided."
@@ -1451,6 +310,7 @@ def add_memory_entry(entry):
             save_memory(memory)
             return f"Added/updated phrase: '{e}'."
 def clear_memory():
     global memory
     with MEMORY_LOCK:
@@ -1458,11 +318,8 @@ def clear_memory():
         save_memory(memory)
     return "Memory cleared."
 def view_memory(limit=2000):
-    """
-    Returns a text summary of memory (words sorted by count then phrases).
-    limit parameter caps returned characters for UI.
-    """
     w = memory.get("words", {})
     p = memory.get("phrases", {})
     out_lines = []
@@ -1482,9 +339,7 @@ def view_memory(limit=2000):
 # ---------- File utilities ----------
 def save_as_word(text, filename=None):
     if filename is None:
-        filename = os.path.join(
-            tempfile.gettempdir(), "merged_transcripts.docx"
-        )
     doc = Document()
     doc.add_paragraph(text)
     doc.save(filename)
@@ -1493,20 +348,12 @@ def save_as_word(text, filename=None):
 # ---------- improved ffmpeg convert ----------
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
-    """
-    Use ffmpeg to convert input_path -> out_path.
-    Let ffmpeg autodetect input unless fmt signals raw PCM.
-    """
     try:
         cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
         if fmt in ("s16le", "pcm_s16le", "mulaw"):
-            # raw input: specify input format and sample params before -i
             cmd += ["-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path, out_path]
         else:
-            # autodetect input, request output sample rate/channels
             cmd += ["-i", input_path, "-ar", str(sr), "-ac", str(ch), out_path]
         proc = subprocess.run(cmd, capture_output=True, timeout=60, text=True)
         stdout_stderr = (proc.stdout or "") + (proc.stderr or "")
         if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
@@ -1554,7 +401,6 @@ def convert_to_wav_if_needed(input_path):
         except Exception:
             pass
-    # ffmpeg fallback
     diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
     diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
     diagnostics = []
@@ -1582,7 +428,6 @@ def convert_to_wav_if_needed(input_path):
             except Exception:
                 pass
-    # final diagnostics
     try:
         fp = subprocess.run(
             ["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
@@ -1619,7 +464,6 @@ MODEL_CACHE = {}
 def get_whisper_model(name):
     if name not in MODEL_CACHE:
         print(f"DEBUG: loading whisper model '{name}'", flush=True)
-        # You can set device by changing whisper.load_model(name, device="cpu") if needed.
         MODEL_CACHE[name] = whisper.load_model(name)
     return MODEL_CACHE[name]
@@ -1634,19 +478,14 @@ def transcribe_multiple(
     zip_password=None,
     enable_memory=False,
 ):
-    """
-    Generator yields (log_text, transcripts_text, merged_file_path_or_None, percent_int)
-    """
     log = []
     transcripts = []
     word_file_path = None
     temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
     extracted_audio_paths = []
-    # initial yield
     yield "", "", None, 0
-    # cleanup previous temp dir
     if os.path.exists(temp_extract_dir):
         try:
             shutil.rmtree(temp_extract_dir)
@@ -1654,7 +493,6 @@ def transcribe_multiple(
         except Exception:
             pass
-    # handle zip
     if zip_file:
         log.append(f"Processing zip: {zip_file}")
         yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
@@ -1724,7 +562,6 @@ def transcribe_multiple(
             yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
             return
-    # collect audio file paths
     paths = []
     if extracted_audio_paths:
         paths.extend(extracted_audio_paths)
@@ -1741,7 +578,6 @@ def transcribe_multiple(
         yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
         return
-    # load model
     yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
     try:
         model = get_whisper_model(model_name)
@@ -1756,9 +592,7 @@ def transcribe_multiple(
     for p in paths:
         idx += 1
         log.append(f"Processing file ({idx}/{total}): {p}")
-        yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
-            5 + (idx - 1) * 80 / max(1, total)
-        )
         wav = None
         try:
@@ -1766,12 +600,8 @@ def transcribe_multiple(
             log.append(f"Converted to WAV: {wav}")
         except Exception as e:
             log.append(f"Conversion failed for {p}: {e}")
-            transcripts.append(
-                f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}"
-            )
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
-                5 + idx * 80 / max(1, total)
-            )
             continue
         try:
@@ -1786,9 +616,7 @@ def transcribe_multiple(
             if enable_memory:
                 text = memory_correct_text(text)
             text = postprocess_transcript(text)
-            transcripts.append(
-                f"FILE: {os.path.basename(p)}\n{text}\n"
-            )
             if enable_memory:
                 try:
@@ -1797,17 +625,11 @@ def transcribe_multiple(
                 except Exception:
                     pass
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
-                10 + idx * 85 / max(1, total)
-            )
         except Exception as e:
             log.append(f"Transcription failed for {p}: {e}")
-            transcripts.append(
-                f"FILE: {os.path.basename(p)}\nERROR: Transcription failed: {e}"
-            )
-            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(
-                10 + idx * 85 / max(1, total)
-            )
             continue
         finally:
             try:
@@ -1826,7 +648,6 @@ def transcribe_multiple(
             except Exception:
                 pass
-    # final merge option
     if merge_checkbox:
         try:
             merged_text = "\n\n".join(transcripts)
@@ -1836,10 +657,8 @@ def transcribe_multiple(
             log.append(f"Failed to save merged file: {e}")
             word_file_path = None
-    # final yield
     yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
-    # cleanup extracted dir
     try:
         if os.path.exists(temp_extract_dir):
             shutil.rmtree(temp_extract_dir)
@@ -1860,15 +679,9 @@ def run_transcription_wrapper(
     enable_memory,
     advanced_options_state,
 ):
-    """
-    Gradio callback that streams results from transcribe_multiple.
-    This MUST be a generator (use `yield`) so Gradio receives 4 outputs,
-    not a single generator object.
-    """
     try:
         audio_input = files
-        # Normalize zip path
         zip_path = None
         if zip_file:
             if isinstance(zip_file, (str, os.PathLike)):
@@ -1878,15 +691,13 @@ def run_transcription_wrapper(
             elif isinstance(zip_file, dict) and zip_file.get("name"):
                 zip_path = zip_file["name"]
-        # Decide final zip password
         if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
             final_zip_password = default_zip_password
         else:
             final_zip_password = zip_password
-        adv = {}  # placeholder for future advanced options
-        # Stream results from the core generator
         for logs, transcripts, word_path, percent in transcribe_multiple(
             audio_input,
             model_name,
@@ -1907,7 +718,6 @@ def run_transcription_wrapper(
 print("DEBUG: building Gradio Blocks", flush=True)
-# ---------- Minimal, easy-to-use UI ----------
 with gr.Blocks(title="Whisper Transcriber") as demo:
     gr.Markdown(
         "## Whisper Transcriber\n"
@@ -1916,7 +726,6 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
     )
     with gr.Row():
-        # LEFT: Inputs (small boxes, simple controls)
         with gr.Column(scale=1):
             gr.Markdown("### Input")
@@ -1924,14 +733,14 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
                 label="Audio files",
                 file_count="multiple",
                 type="filepath",
-                height=60,  # small box
             )
             zip_input = gr.File(
                 label="ZIP with audio (optional)",
                 file_count="single",
                 type="filepath",
-                height=60,  # small box
             )
             use_default_zip_pass = gr.Checkbox(
@@ -1968,7 +777,6 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
             submit = gr.Button("Transcribe", variant="primary")
-            # Memory management UI
             gr.Markdown("### Memory management")
             mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="file")
             mem_import_btn = gr.Button("Import Memory File")
@@ -1976,9 +784,8 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
             mem_add_btn = gr.Button("Add to Memory")
             mem_clear_btn = gr.Button("Clear Memory")
             mem_view_btn = gr.Button("View Memory")
-            mem_status = gr.Textbox(label="Memory status", interactive=False, lines=4)
-        # RIGHT: Outputs (Transcript → Progress → Download → Logs)
         with gr.Column(scale=1):
             gr.Markdown("### Output")
@@ -2007,7 +814,6 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
                 interactive=False,
             )
-    # Transcription click binding
     submit.click(
         fn=run_transcription_wrapper,
         inputs=[
@@ -2021,11 +827,9 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
             memory_checkbox,
             gr.State({}),
         ],
-        # The function yields: logs, transcripts, download_file, progress
         outputs=[logs, transcripts_out, download_file, progress_num],
     )
-    # Memory button bindings
     def _import_memory(uploaded):
         return import_memory_file(uploaded)
@@ -2037,7 +841,6 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
     mem_view_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[mem_status])
-# ---------- Launch ----------
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     print("DEBUG: launching Gradio on port", port, flush=True)
@@ -2047,31 +850,3 @@ if __name__ == "__main__":
         print("FATAL: demo.launch failed:", e, flush=True)
         traceback.print_exc()
         raise
-Notes & usage tips
-Importable memory formats:
-JSON with { "words": {"word": count, ...}, "phrases": {"some phrase": count, ...} }
-Plain text with each line a word (or word,count), or longer lines will be treated as phrases.
-Enable correction memory should be checked to apply memory-based corrections during transcription.
-View Memory shows the top 50 words/phrases (truncated for UI).
-Clear Memory removes all stored words & phrases (and overwrites memory.json).
-If you'd like I can:
-produce a version that also shows a small popup listing extracted files from a ZIP and lets the user choose which to transcribe (you mentioned that previously), or
-add a "device" dropdown (cpu/cuda) and pass to whisper.load_model, or
-create a minimal tests script that runs conversion on a few sample audio files to validate ffmpeg fallbacks.
-Which follow-up would you like?
-No file chosenNo file chosen
-ChatGPT can make mistakes. Check important info. See Cookie Preferences.

 # app.py
 # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
 # Clean, single-version file for Hugging Face Spaces.
 # ---------- Config ----------
 MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
+MIN_WAV_SIZE = 1024  # bytes
 FFMPEG_CANDIDATES = [
     ("s16le", 16000, 1),
     ("s16le", 44100, 2),
         if os.path.exists(MEMORY_FILE):
             with open(MEMORY_FILE, "r", encoding="utf-8") as fh:
                 data = json.load(fh)
                 if not isinstance(data, dict):
                     raise ValueError("memory.json root not dict")
                 data.setdefault("words", {})
     flush=True,
 )
 # ---------- Postprocessing ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
 # ---------- Memory utilities ----------
 def extract_words_and_phrases(text):
     words = re.findall(r"[A-Za-z0-9\-']+", text)
+    sentences = [s.strip() for s in re.split(r"(?<=[.?!])\s+", text) if s.strip()]
     return [w for w in words if w.strip()], sentences
         lw = w.lower()
         if lw in memory["words"]:
             return w
+        candidates = get_close_matches(lw, memory["words"].keys(), n=1, cutoff=min_ratio)
         if candidates:
             cand = candidates[0]
             if w and w[0].isupper():
         if len(low_phrase) < 8:
             continue
         if low_phrase in corrected.lower():
+            corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
     return corrected
 # ---------- Memory management UI helpers ----------
 def import_memory_file(uploaded):
     global memory
     if not uploaded:
         return "No file provided."
     path = None
     try:
         if isinstance(uploaded, (str, os.PathLike)):
             path = uploaded["name"]
         else:
             return "Unable to determine uploaded file path."
         with open(path, "r", encoding="utf-8") as fh:
             raw = fh.read()
         try:
             parsed = json.loads(raw)
             if isinstance(parsed, dict):
                 with MEMORY_LOCK:
                     parsed_words = parsed.get("words", {})
                     parsed_phrases = parsed.get("phrases", {})
                     for k, v in parsed_words.items():
                     save_memory(memory)
                 return f"Imported JSON memory (words={len(parsed_words)}, phrases={len(parsed_phrases)})."
         except Exception:
             pass
         lines = [l.strip() for l in raw.splitlines() if l.strip()]
         added_words = 0
         added_phrases = 0
                     memory["words"][key] = memory["words"].get(key, 0) + cnt
                     added_words += 1
                 else:
                     if len(line.split()) <= 3:
                         key = line.lower()
                         memory["words"][key] = memory["words"].get(key, 0) + 1
 def add_memory_entry(entry):
     global memory
     if not entry or not entry.strip():
         return "No entry provided."
             save_memory(memory)
             return f"Added/updated phrase: '{e}'."
 def clear_memory():
     global memory
     with MEMORY_LOCK:
         save_memory(memory)
     return "Memory cleared."
 def view_memory(limit=2000):
     w = memory.get("words", {})
     p = memory.get("phrases", {})
     out_lines = []
 # ---------- File utilities ----------
 def save_as_word(text, filename=None):
     if filename is None:
+        filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
     doc = Document()
     doc.add_paragraph(text)
     doc.save(filename)
 # ---------- improved ffmpeg convert ----------
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
     try:
         cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
         if fmt in ("s16le", "pcm_s16le", "mulaw"):
             cmd += ["-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path, out_path]
         else:
             cmd += ["-i", input_path, "-ar", str(sr), "-ac", str(ch), out_path]
         proc = subprocess.run(cmd, capture_output=True, timeout=60, text=True)
         stdout_stderr = (proc.stdout or "") + (proc.stderr or "")
         if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
         except Exception:
             pass
     diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
     diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
     diagnostics = []
             except Exception:
                 pass
     try:
         fp = subprocess.run(
             ["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
 def get_whisper_model(name):
     if name not in MODEL_CACHE:
         print(f"DEBUG: loading whisper model '{name}'", flush=True)
         MODEL_CACHE[name] = whisper.load_model(name)
     return MODEL_CACHE[name]
     zip_password=None,
     enable_memory=False,
 ):
     log = []
     transcripts = []
     word_file_path = None
     temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
     extracted_audio_paths = []
     yield "", "", None, 0
     if os.path.exists(temp_extract_dir):
         try:
             shutil.rmtree(temp_extract_dir)
         except Exception:
             pass
     if zip_file:
         log.append(f"Processing zip: {zip_file}")
         yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
             yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
             return
     paths = []
     if extracted_audio_paths:
         paths.extend(extracted_audio_paths)
         yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
         return
     yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
     try:
         model = get_whisper_model(model_name)
     for p in paths:
         idx += 1
         log.append(f"Processing file ({idx}/{total}): {p}")
+        yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + (idx - 1) * 80 / max(1, total))
         wav = None
         try:
             log.append(f"Converted to WAV: {wav}")
         except Exception as e:
             log.append(f"Conversion failed for {p}: {e}")
+            transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}")
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + idx * 80 / max(1, total))
             continue
         try:
             if enable_memory:
                 text = memory_correct_text(text)
             text = postprocess_transcript(text)
+            transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
             if enable_memory:
                 try:
                 except Exception:
                     pass
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
         except Exception as e:
             log.append(f"Transcription failed for {p}: {e}")
+            transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Transcription failed: {e}")
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
             continue
         finally:
             try:
             except Exception:
                 pass
     if merge_checkbox:
         try:
             merged_text = "\n\n".join(transcripts)
             log.append(f"Failed to save merged file: {e}")
             word_file_path = None
     yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
     try:
         if os.path.exists(temp_extract_dir):
             shutil.rmtree(temp_extract_dir)
     enable_memory,
     advanced_options_state,
 ):
     try:
         audio_input = files
         zip_path = None
         if zip_file:
             if isinstance(zip_file, (str, os.PathLike)):
             elif isinstance(zip_file, dict) and zip_file.get("name"):
                 zip_path = zip_file["name"]
         if use_default_zip_pass and (not zip_password or zip_password.strip() == ""):
             final_zip_password = default_zip_password
         else:
             final_zip_password = zip_password
+        adv = {}
         for logs, transcripts, word_path, percent in transcribe_multiple(
             audio_input,
             model_name,
 print("DEBUG: building Gradio Blocks", flush=True)
 with gr.Blocks(title="Whisper Transcriber") as demo:
     gr.Markdown(
         "## Whisper Transcriber\n"
     )
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Input")
                 label="Audio files",
                 file_count="multiple",
                 type="filepath",
+                height=60,
             )
             zip_input = gr.File(
                 label="ZIP with audio (optional)",
                 file_count="single",
                 type="filepath",
+                height=60,
             )
             use_default_zip_pass = gr.Checkbox(
             submit = gr.Button("Transcribe", variant="primary")
             gr.Markdown("### Memory management")
             mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="file")
             mem_import_btn = gr.Button("Import Memory File")
             mem_add_btn = gr.Button("Add to Memory")
             mem_clear_btn = gr.Button("Clear Memory")
             mem_view_btn = gr.Button("View Memory")
+            mem_status = gr.Textbox(label="Memory status", interactive=False, lines=6)
         with gr.Column(scale=1):
             gr.Markdown("### Output")
                 interactive=False,
             )
     submit.click(
         fn=run_transcription_wrapper,
         inputs=[
             memory_checkbox,
             gr.State({}),
         ],
         outputs=[logs, transcripts_out, download_file, progress_num],
     )
     def _import_memory(uploaded):
         return import_memory_file(uploaded)
     mem_view_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[mem_status])
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     print("DEBUG: launching Gradio on port", port, flush=True)
         print("FATAL: demo.launch failed:", e, flush=True)
         traceback.print_exc()
         raise