Spaces:

staraks
/

arvind

Sleeping

App Files Files Community

staraks commited on Nov 14, 2025

Commit

41eb42e

verified ·

1 Parent(s): ef63fe4

Update app.py

Browse files

Files changed (1) hide show

app.py +420 -5

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # app.py
 # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
-# Cleaned, debugged, and Spaces-ready.
-# Replace /app/app.py with this file and restart container.
 import os
 import sys
@@ -34,10 +33,9 @@ except Exception as e:
 print("DEBUG: imports OK", flush=True)
 # ---------- Config ----------
-MEMORY_FILE = "memory.json"   # persistent memory in repo (will be written)
 MEMORY_LOCK = threading.Lock()
 MIN_WAV_SIZE = 200            # bytes
-# Fallback ffmpeg conversion candidates (short hybrid list)
 FFMPEG_CANDIDATES = [
     ("s16le", 16000, 1),
     ("s16le", 44100, 2),
@@ -71,6 +69,7 @@ def save_memory(mem):
 memory = load_memory()
 print("DEBUG: memory loaded (words=%d phrases=%d)" % (len(memory.get("words", {})), len(memory.get("phrases", {}))), flush=True)
 # ---------- Postprocessing ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
@@ -149,4 +148,420 @@ def postprocess_transcript(text, format_soap=False):
 # ---------- Memory utilities ----------
 def extract_words_and_phrases(text):
-    words = re.findall(r"[A-Za-z0-]()

 # app.py
 # Whisper transcription app - HYBRID conversion (pydub + small ffmpeg fallback)
+# Paste chunks 1/4 -> 2/4 -> 3/4 -> 4/4 in order into /app/app.py
 import os
 import sys
 print("DEBUG: imports OK", flush=True)
 # ---------- Config ----------
+MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
 MIN_WAV_SIZE = 200            # bytes
 FFMPEG_CANDIDATES = [
     ("s16le", 16000, 1),
     ("s16le", 44100, 2),
 memory = load_memory()
 print("DEBUG: memory loaded (words=%d phrases=%d)" % (len(memory.get("words", {})), len(memory.get("phrases", {}))), flush=True)
 # ---------- Postprocessing ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
 # ---------- Memory utilities ----------
 def extract_words_and_phrases(text):
+    # basic tokenization for words; phrases = sentences
+    words = re.findall(r"[A-Za-z0-9\-']+", text)
+    sentences = [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()]
+    return [w for w in words if w.strip()], sentences
+def update_memory_with_transcript(transcript):
+    global memory
+    words, sentences = extract_words_and_phrases(transcript)
+    changed = False
+    with MEMORY_LOCK:
+        for w in words:
+            lw = w.lower()
+            if lw in memory["words"]:
+                memory["words"][lw] += 1
+            else:
+                memory["words"][lw] = 1
+                changed = True
+        for s in sentences:
+            key = s.strip()
+            if key in memory["phrases"]:
+                memory["phrases"][key] += 1
+            else:
+                memory["phrases"][key] = 1
+                changed = True
+        if changed:
+            try:
+                with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
+                    json.dump(memory, fh, ensure_ascii=False, indent=2)
+            except Exception:
+                pass
+def memory_correct_text(text, min_ratio=0.85):
+    if not text or (not memory.get("words") and not memory.get("phrases")):
+        return text
+    def fix_word(w):
+        lw = w.lower()
+        if lw in memory["words"]:
+            return w
+        candidates = get_close_matches(lw, memory["words"].keys(), n=1, cutoff=min_ratio)
+        if candidates:
+            cand = candidates[0]
+            if w and w[0].isupper():
+                return cand.capitalize()
+            return cand
+        return w
+    tokens = re.split(r'(\W+)', text)
+    corrected_tokens = []
+    for tok in tokens:
+        if re.match(r"^[A-Za-z0-9\-']+$", tok):
+            corrected_tokens.append(fix_word(tok))
+        else:
+            corrected_tokens.append(tok)
+    corrected = ''.join(corrected_tokens)
+    for phrase in sorted(memory.get("phrases", {}).keys(), key=lambda s: -len(s)):
+        low_phrase = phrase.lower()
+        if len(low_phrase) < 8:
+            continue
+        if low_phrase in corrected.lower():
+            corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
+    return corrected
+# ---------- File utilities ----------
+def save_as_word(text, filename=None):
+    if filename is None:
+        filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
+    doc = Document()
+    doc.add_paragraph(text)
+    doc.save(filename)
+    return filename
+# ---------- Hybrid conversion: pydub + small ffmpeg fallback ----------
+def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
+    cmd = [
+        "ffmpeg", "-hide_banner", "-loglevel", "error", "-y",
+        "-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path, out_path
+    ]
+    try:
+        proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
+        if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
+            return True, proc.stderr + proc.stdout
+        else:
+            try:
+                if os.path.exists(out_path):
+                    os.unlink(out_path)
+            except Exception:
+                pass
+            return False, proc.stderr + proc.stdout
+    except Exception as e:
+        try:
+            if os.path.exists(out_path):
+                os.unlink(out_path)
+        except Exception:
+            pass
+        return False, str(e)
+def convert_to_wav_if_needed(input_path):
+    input_path = str(input_path)
+    lower = input_path.lower()
+    if lower.endswith(".wav"):
+        return input_path
+    auto_err = ""
+    tmp = None
+    try:
+        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        tmp.close()
+        AudioSegment.from_file(input_path).export(tmp.name, format="wav")
+        if os.path.exists(tmp.name) and os.path.getsize(tmp.name) > MIN_WAV_SIZE:
+            return tmp.name
+        else:
+            try:
+                os.unlink(tmp.name)
+            except Exception:
+                pass
+    except Exception:
+        auto_err = traceback.format_exc()
+        try:
+            if tmp and os.path.exists(tmp.name):
+                os.unlink(tmp.name)
+        except Exception:
+            pass
+    diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
+    diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
+    diagnostics = []
+    for fmt, sr, ch in FFMPEG_CANDIDATES:
+        out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        out_wav.close()
+        success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
+        diagnostics.append(f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n")
+        if success:
+            try:
+                with open(diag_log, "w", encoding="utf-8") as fh:
+                    fh.write("pydub auto error:\n")
+                    fh.write(auto_err + "\n\n")
+                    fh.write("Successful ffmpeg candidate:\n")
+                    fh.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
+                    fh.write("Diagnostics:\n")
+                    fh.write("\n".join(diagnostics))
+            except Exception:
+                pass
+            return out_wav.name
+        else:
+            try:
+                if os.path.exists(out_wav.name):
+                    os.unlink(out_wav.name)
+            except Exception:
+                pass
+    try:
+        fp = subprocess.run(["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
+                            capture_output=True, text=True, timeout=10)
+        diagnostics.append("FFPROBE:\n" + (fp.stdout.strip() or fp.stderr.strip()))
+    except Exception as e:
+        diagnostics.append("ffprobe failed: " + str(e))
+    try:
+        with open(input_path, "rb") as fh:
+            head = fh.read(512)
+            diagnostics.append("HEX PREVIEW:\n" + head.hex())
+    except Exception as e:
+        diagnostics.append("could not read head: " + str(e))
+    try:
+        with open(diag_log, "w", encoding="utf-8") as fh:
+            fh.write("pydub auto error:\n")
+            fh.write(auto_err + "\n\n")
+            fh.write("Full diagnostics:\n\n")
+            fh.write("\n\n".join(diagnostics))
+    except Exception as e:
+        raise Exception(f"Conversion failed; diagnostics write error: {e}")
+    raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
+# ---------- Whisper model cache ----------
+MODEL_CACHE = {}
+def get_whisper_model(name):
+    if name not in MODEL_CACHE:
+        print(f"DEBUG: loading whisper model '{name}'", flush=True)
+        MODEL_CACHE[name] = whisper.load_model(name)
+    return MODEL_CACHE[name]
+# ---------- Main transcription generator ----------
+def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbox, zip_file=None, zip_password=None, enable_memory=False):
+    log = []
+    transcripts = []
+    word_file_path = None
+    temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
+    extracted_audio_paths = []
+    # initial yield
+    yield "", "", None, 0
+    # cleanup previous
+    if os.path.exists(temp_extract_dir):
+        try:
+            shutil.rmtree(temp_extract_dir)
+            log.append(f"Cleaned previous temp dir: {temp_extract_dir}")
+        except Exception:
+            pass
+    # handle zip
+    if zip_file:
+        log.append(f"Processing zip: {zip_file}")
+        yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
+        try:
+            os.makedirs(temp_extract_dir, exist_ok=True)
+            with pyzipper.ZipFile(zip_file, "r") as zf:
+                if zip_password:
+                    try:
+                        zf.setpassword(zip_password.encode())
+                    except Exception:
+                        log.append("Incorrect zip password")
+                        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+                        return
+                exts = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.m4a', '.dat', '.dct']
+                count = 0
+                for info in zf.infolist():
+                    if info.is_dir():
+                        continue
+                    _, ext = os.path.splitext(info.filename)
+                    if ext.lower() in exts:
+                        try:
+                            zf.extract(info, path=temp_extract_dir)
+                        except Exception as e:
+                            log.append(f"Error extracting {info.filename}: {e}")
+                            continue
+                        p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
+                        if os.path.exists(p):
+                            extracted_audio_paths.append(p)
+                            count += 1
+                            log.append(f"Extracted: {info.filename}")
+                if count == 0:
+                    log.append("No supported audio in zip.")
+                    try:
+                        shutil.rmtree(temp_extract_dir)
+                    except Exception:
+                        pass
+                    yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+                    return
+        except pyzipper.BadZipFile:
+            log.append("Invalid zip file.")
+            try:
+                shutil.rmtree(temp_extract_dir)
+            except Exception:
+                pass
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+            return
+        except Exception as e:
+            log.append(f"Zip processing error: {e}")
+            try:
+                shutil.rmtree(temp_extract_dir)
+            except Exception:
+                pass
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+            return
+    # collect audio file paths
+    paths = []
+    if extracted_audio_paths:
+        paths.extend(extracted_audio_paths)
+    if audio_files:
+        if isinstance(audio_files, (list, tuple)):
+            for a in audio_files:
+                if a:
+                    paths.append(a)
+        elif isinstance(audio_files, str):
+            paths.append(audio_files)
+    if not paths:
+        log.append("No audio files provided.")
+        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+        return
+    # load model (on demand)
+    yield "\n\n".join(log), "\n\n".join(transcripts), None, 5
+    try:
+        model = get_whisper_model(model_name)
+        log.append(f"Loaded Whisper model: {model_name}")
+    except Exception as e:
+        log.append(f"Failed to load model {model_name}: {e}")
+        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+        return
+    total = len(paths)
+    idx = 0
+    for p in paths:
+        idx += 1
+        log.append(f"Processing file ({idx}/{total}): {p}")
+        yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + (idx-1) * 80 / max(1, total))
+        wav = None
+        try:
+            wav = convert_to_wav_if_needed(p)
+            log.append(f"Converted to WAV: {wav}")
+        except Exception as e:
+            log.append(f"Conversion failed for {p}: {e}")
+            transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Conversion failed: {e}")
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(5 + idx * 80 / max(1, total))
+            continue
+        try:
+            whisper_opts = {}
+            if isinstance(advanced_options, dict):
+                whisper_opts.update(advanced_options)
+            result = model.transcribe(wav, **whisper_opts)
+            text = result.get("text", "").strip()
+            log.append(f"Transcribed: {len(text)} chars")
+            if enable_memory:
+                text = memory_correct_text(text)
+            text = postprocess_transcript(text)
+            transcripts.append(f"FILE: {os.path.basename(p)}\n{text}\n")
+            if enable_memory:
+                try:
+                    update_memory_with_transcript(text)
+                    log.append("Memory updated.")
+                except Exception:
+                    pass
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
+        except Exception as e:
+            log.append(f"Transcription failed for {p}: {e}")
+            transcripts.append(f"FILE: {os.path.basename(p)}\nERROR: Transcription failed: {e}")
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, int(10 + idx * 85 / max(1, total))
+            continue
+        finally:
+            try:
+                if wav and os.path.exists(wav):
+                    tmpdir = tempfile.gettempdir()
+                    try:
+                        if os.path.commonpath([tmpdir, os.path.abspath(wav)]) == tmpdir and (not p.lower().endswith(".wav")):
+                            os.unlink(wav)
+                    except Exception:
+                        pass
+            except Exception:
+                pass
+    if merge_checkbox:
+        try:
+            merged_text = "\n\n".join(transcripts)
+            word_file_path = save_as_word(merged_text)
+            log.append(f"Merged transcript saved: {word_file_path}")
+        except Exception as e:
+            log.append(f"Failed to save merged file: {e}")
+            word_file_path = None
+    yield "\n\n".join(log), "\n\n".join(transcripts), word_file_path, 100
+    try:
+        if os.path.exists(temp_extract_dir):
+            shutil.rmtree(temp_extract_dir)
+            log.append("Cleaned temporary extraction dir.")
+    except Exception:
+        pass
+# ----------------------- Gradio UI -----------------------
+def run_transcription_wrapper(files, model_name, merge, zip_file, zip_password, enable_memory, advanced_options_state):
+    audio_input = files
+    zip_path = None
+    if zip_file:
+        if isinstance(zip_file, (str, os.PathLike)):
+            zip_path = str(zip_file)
+        elif hasattr(zip_file, "name"):
+            zip_path = zip_file.name
+        elif isinstance(zip_file, dict) and zip_file.get("name"):
+            zip_path = zip_file["name"]
+    adv = {}
+    return transcribe_multiple(audio_input, model_name, adv, merge_checkbox=merge, zip_file=zip_path, zip_password=zip_password, enable_memory=enable_memory)
+print("DEBUG: building Gradio Blocks", flush=True)
+demo = gr.Blocks()
+with demo:
+    gr.Markdown("## Whisper Transcription (Spaces-ready)")
+    with gr.Row():
+        with gr.Column(scale=2):
+            file_input = gr.File(label="Upload audio files (or zip)", file_count="multiple", type="filepath")
+            zip_input = gr.File(label="Optional: Upload zip file containing audio", file_count="single", type="filepath")
+            zip_password = gr.Textbox(label="Zip password (if any)", placeholder="password (optional)")
+            model_select = gr.Dropdown(choices=["small","medium","large","base"], value="small", label="Whisper model")
+            merge_checkbox = gr.Checkbox(label="Merge transcripts to a single .docx (downloadable)", value=True)
+            memory_checkbox = gr.Checkbox(label="Enable persistent memory (word/phrase correction)", value=False)
+            submit = gr.Button("Transcribe")
+        with gr.Column(scale=3):
+            logs = gr.Textbox(label="Logs (streaming)", lines=12)
+            transcripts_out = gr.Textbox(label="Transcripts (streaming)", lines=12)
+            download_file = gr.File(label="Merged .docx (when enabled)")
+            progress_num = gr.Number(value=0, label="Progress (%)")
+    submit.click(
+        fn=run_transcription_wrapper,
+        inputs=[file_input, model_select, merge_checkbox, zip_input, zip_password, memory_checkbox, gr.State({})],
+        outputs=[logs, transcripts_out, download_file, progress_num],
+    )
+# Launch
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    print("DEBUG: launching Gradio on port", port, flush=True)
+    try:
+        demo.queue().launch(server_name="0.0.0.0", server_port=port)
+    except Exception as e:
+        print("FATAL: demo.launch failed:", e, flush=True)
+        traceback.print_exc()
+        raise