Spaces:

staraks
/

arvind

Sleeping

App Files Files Community

staraks commited on Nov 14, 2025

Commit

d366308

verified ·

1 Parent(s): a086336

Update app.py

Browse files

Files changed (1) hide show

app.py +429 -13

app.py CHANGED Viewed

@@ -1,8 +1,428 @@
-# ----------------------- CONTINUATION / APP LAUNCH -----------------------
-# Append this to the end of your app.py (after the previous code)
-                yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-                return
     # collect audio file paths from either audio_files or extracted paths
     paths = []
@@ -111,28 +531,25 @@
         pass
 # ----------------------- Gradio UI -----------------------
-def run_transcription_wrapper(files, model_name, merge, zip_file, zip_password, enable_memory, advanced_options):
     """
-    Gradio wrapper: accepts file upload(s) and zip file (single), returns final outputs.
-    Because Gradio supports generator functions directly, we can return the generator.
     """
-    # normalize inputs
     audio_input = files
     zip_path = None
     if zip_file:
-        # gr.File will provide a dict-like or path depending on version; try to handle both
         if isinstance(zip_file, (str, os.PathLike)):
             zip_path = str(zip_file)
         elif hasattr(zip_file, "name"):
             zip_path = zip_file.name
         elif isinstance(zip_file, dict) and zip_file.get("name"):
             zip_path = zip_file["name"]
-    # advanced options not used heavily here; keep empty dict if None
-    adv = advanced_options or {}
     return transcribe_multiple(audio_input, model_name, adv, merge_checkbox=merge, zip_file=zip_path, zip_password=zip_password, enable_memory=enable_memory)
 # Build Blocks UI
-demo = gr.Blocks()
 with demo:
     gr.Markdown("## Whisper Transcription (Spaces-ready)")
@@ -151,7 +568,6 @@ with demo:
             download_file = gr.File(label="Merged .docx (when enabled)")
     # connect
-    # Gradio supports generator functions directly; the outputs are (logs, transcripts, file, progress)
     submit.click(fn=run_transcription_wrapper,
                  inputs=[file_input, model_select, merge_checkbox, zip_input, zip_password, memory_checkbox, gr.State({})],
                  outputs=[logs, transcripts_out, download_file])

+# app.py
+# Full Whisper transcription app for Hugging Face Spaces
+# - Advanced .dct conversion (ffmpeg heuristics + pydub)
+# - Zip extraction (pyzipper)
+# - Whisper transcription (cached)
+# - Live progress & logs to Gradio (generator)
+# - Persistent memory (word + phrase) with fuzzy correction
+# - Simple medical post-processing (abbrev expansion)
+# - Merge transcripts to .docx
+# - Binds to 0.0.0.0:$PORT and uses demo.queue().launch()
+import os
+import json
+import shutil
+import tempfile
+import subprocess
+import traceback
+import threading
+import re
+from difflib import get_close_matches
+from pathlib import Path
+from docx import Document
+import whisper
+import gradio as gr
+import pyzipper
+from pydub import AudioSegment
+# ---------- Config ----------
+MEMORY_FILE = "memory.json"   # persistent memory in repo (will be written)
+MEMORY_LOCK = threading.Lock()
+DIAGNOSTICS_DIR_BASE = tempfile.gettempdir()
+MIN_WAV_SIZE = 200
+# ----------------------------
+# ensure memory file exists
+def load_memory():
+    try:
+        if os.path.exists(MEMORY_FILE):
+            with open(MEMORY_FILE, "r", encoding="utf-8") as fh:
+                return json.load(fh)
+    except Exception:
+        pass
+    # default structure
+    mem = {"words": {}, "phrases": {}}
+    try:
+        with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
+            json.dump(mem, fh, ensure_ascii=False, indent=2)
+    except Exception:
+        pass
+    return mem
+def save_memory(mem):
+    with MEMORY_LOCK:
+        with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
+            json.dump(mem, fh, ensure_ascii=False, indent=2)
+memory = load_memory()
+# ---------- Simple medical post-processing ----------
+MEDICAL_ABBREVIATIONS = {
+    "pt": "patient",
+    "dx": "diagnosis",
+    "hx": "history",
+    "sx": "symptoms",
+    "c/o": "complains of",
+    "bp": "blood pressure",
+    "hr": "heart rate",
+    "o2": "oxygen",
+    "r/o": "rule out",
+    "adm": "admit",
+    "disch": "discharge",
+    # extend as needed
+}
+DRUG_NORMALIZATION = {
+    "metformin": "Metformin",
+    "aspirin": "Aspirin",
+    "amoxicillin": "Amoxicillin",
+}
+def expand_abbreviations(text):
+    tokens = re.split(r'(\s+)', text)
+    out = []
+    for t in tokens:
+        key = t.lower().strip(".,;:")
+        if key in MEDICAL_ABBREVIATIONS:
+            trailing = ''
+            m = re.match(r'([A-Za-z0-9/]+)([.,;:]*)', t)
+            if m:
+                trailing = m.group(2) or ''
+            out.append(MEDICAL_ABBREVIATIONS[key] + trailing)
+        else:
+            out.append(t)
+    return ''.join(out)
+def normalize_drugs(text):
+    for k, v in DRUG_NORMALIZATION.items():
+        text = re.sub(rf'\b{k}\b', v, text, flags=re.IGNORECASE)
+    return text
+def punctuation_and_capitalization(text):
+    text = text.strip()
+    if not text:
+        return text
+    if not re.search(r'[.?!]\s*$', text):
+        text = text.rstrip() + '.'
+    parts = re.split(r'([.?!]\s+)', text)
+    out = []
+    for p in parts:
+        if p and not re.match(r'[.?!]\s+', p):
+            out.append(p.capitalize())
+        else:
+            out.append(p)
+    return ''.join(out)
+def postprocess_transcript(text, format_soap=False):
+    if not text:
+        return text
+    t = re.sub(r'\s+', ' ', text).strip()
+    t = expand_abbreviations(t)
+    t = normalize_drugs(t)
+    t = punctuation_and_capitalization(t)
+    if format_soap:
+        sentences = re.split(r'(?<=[.?!])\s+', t)
+        subj = sentences[0] if len(sentences) >= 1 else ""
+        obj = sentences[1] if len(sentences) >= 2 else ""
+        assessment = ""
+        for kw in ["diagnosis", "dx", "rule out", "r/o", "probable"]:
+            if kw in t.lower():
+                assessment = "Assessment: " + subj
+                break
+        soap = f"S: {subj}\nO: {obj}\nA: {assessment}\nP: Plan: follow up as indicated."
+        return soap
+    return t
+# ---------- Memory utilities (word + phrase) ----------
+def extract_words_and_phrases(text):
+    # basic tokenization for words; phrases = sentences
+    words = re.findall(r"[A-Za-z0-9\-']+", text)
+    sentences = [s.strip() for s in re.split(r'(?<=[.?!])\s+', text) if s.strip()]
+    return [w for w in words if w.strip()], sentences
+def update_memory_with_transcript(transcript):
+    global memory
+    words, sentences = extract_words_and_phrases(transcript)
+    changed = False
+    with MEMORY_LOCK:
+        for w in words:
+            lw = w.lower()
+            if lw in memory["words"]:
+                memory["words"][lw] += 1
+            else:
+                memory["words"][lw] = 1
+                changed = True
+        for s in sentences:
+            key = s.strip()
+            if key in memory["phrases"]:
+                memory["phrases"][key] += 1
+            else:
+                memory["phrases"][key] = 1
+                changed = True
+        if changed:
+            try:
+                with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
+                    json.dump(memory, fh, ensure_ascii=False, indent=2)
+            except Exception:
+                pass
+def memory_correct_text(text, min_ratio=0.85):
+    """
+    Correct words/phrases in text using memory.
+    - Word-level: uses difflib.get_close_matches against known memory words.
+    - Phrase-level: tries to match stored phrases (exact or close substring).
+    """
+    if not text or (not memory.get("words") and not memory.get("phrases")):
+        return text
+    # word-level corrections
+    def fix_word(w):
+        lw = w.lower()
+        if lw in memory["words"]:
+            return w  # known exact
+        # find close matches from memory words (keys)
+        candidates = get_close_matches(lw, memory["words"].keys(), n=1, cutoff=min_ratio)
+        if candidates:
+            # preserve casing: if candidate is lower, capitalize if original was capitalized
+            cand = candidates[0]
+            if w[0].isupper():
+                return cand.capitalize()
+            return cand
+        return w
+    tokens = re.split(r'(\W+)', text)  # keep punctuation
+    corrected_tokens = []
+    for tok in tokens:
+        if re.match(r"^[A-Za-z0-9\-']+$", tok):
+            corrected_tokens.append(fix_word(tok))
+        else:
+            corrected_tokens.append(tok)
+    corrected = ''.join(corrected_tokens)
+    # phrase-level: try to replace short substrings that closely match known phrases
+    # naive approach: for each stored phrase, if it is short and a fuzzy substring of corrected, replace
+    for phrase in sorted(memory.get("phrases", {}).keys(), key=lambda s: -len(s)):
+        low_phrase = phrase.lower()
+        # only replace if phrase length >= 8 chars to avoid noisy matches
+        if len(low_phrase) < 8:
+            continue
+        if low_phrase in corrected.lower():
+            # find exact location, replace preserving case roughly
+            corrected = re.sub(re.escape(phrase), phrase, corrected, flags=re.IGNORECASE)
+    return corrected
+# ---------- File utilities ----------
+def save_as_word(text, filename=None):
+    if filename is None:
+        filename = os.path.join(tempfile.gettempdir(), "merged_transcripts.docx")
+    doc = Document()
+    doc.add_paragraph(text)
+    doc.save(filename)
+    return filename
+# ---------- Advanced conversion: pydub auto + ffmpeg heuristics ----------
+def convert_to_wav_if_needed(input_path):
+    """
+    Advanced conversion:
+    - pydub (AudioSegment.from_file) first
+    - if that fails, exhaustive ffmpeg format/rate/channel grid
+    - writes diagnostics to a temp folder if conversion fails entirely
+    """
+    input_path = str(input_path)
+    lower = input_path.lower()
+    if lower.endswith(".wav"):
+        return input_path
+    # try pydub first
+    auto_err = ""
+    tmp = None
+    try:
+        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        tmp.close()
+        AudioSegment.from_file(input_path).export(tmp.name, format="wav")
+        return tmp.name
+    except Exception as e:
+        auto_err = traceback.format_exc()
+        try:
+            if tmp:
+                os.unlink(tmp.name)
+        except Exception:
+            pass
+    # fallback grid
+    pcm_formats = ['s16le', 's32le', 's24le', 's8', 'u8', 's16be', 'pcm_s16le', 'pcm_u8', 'pcm_u16le']
+    mulaw_alaw = ['mulaw', 'alaw']
+    adpcm = ['adpcm_ima_wav', 'adpcm_ms']
+    extra = ['gsm', 'g726', 'vorbis']
+    formats = pcm_formats + mulaw_alaw + adpcm + extra
+    sample_rates = [8000, 11025, 12000, 16000, 22050, 32000, 44100, 48000]
+    channels = [1, 2]
+    diagnostics = []
+    diag_dir = tempfile.mkdtemp(prefix="dct_diag_")
+    diag_log = os.path.join(diag_dir, "conversion_diagnostics.txt")
+    for fmt in formats:
+        for sr in sample_rates:
+            for ch in channels:
+                out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+                out_wav.close()
+                cmd = [
+                    "ffmpeg", "-hide_banner", "-loglevel", "error", "-y",
+                    "-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path, out_wav.name
+                ]
+                try:
+                    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=45)
+                except Exception as e_run:
+                    diagnostics.append(f"RUN-EXC fmt={fmt} sr={sr} ch={ch} err={e_run}")
+                    try: os.unlink(out_wav.name)
+                    except Exception: pass
+                    continue
+                rc = proc.returncode
+                stderr = proc.stderr.strip() if proc.stderr else ""
+                stdout = proc.stdout.strip() if proc.stdout else ""
+                diagnostics.append(f"ATTEMPT fmt={fmt} sr={sr} ch={ch} rc={rc}")
+                if stdout:
+                    diagnostics.append("STDOUT:")
+                    diagnostics.append(stdout)
+                if stderr:
+                    diagnostics.append("STDERR:")
+                    diagnostics.append(stderr)
+                diagnostics.append("-" * 60)
+                try:
+                    if rc == 0 and os.path.exists(out_wav.name) and os.path.getsize(out_wav.name) > MIN_WAV_SIZE:
+                        # success
+                        try:
+                            with open(diag_log, "w", encoding="utf-8") as fh:
+                                fh.write("pydub auto error:\n")
+                                fh.write(auto_err + "\n\n")
+                                fh.write("Successful guess:\n")
+                                fh.write(f"fmt={fmt} sr={sr} ch={ch}\n\n")
+                                fh.write("Diagnostics (last attempts):\n")
+                                fh.write("\n".join(diagnostics[-1000:]))
+                        except Exception:
+                            pass
+                        return out_wav.name
+                except Exception:
+                    pass
+                try: os.unlink(out_wav.name)
+                except Exception: pass
+    # ffprobe and hexdump preview
+    try:
+        fp = subprocess.run(["ffprobe", "-v", "error", "-show_format", "-show_streams", input_path],
+                            capture_output=True, text=True, timeout=15)
+        diagnostics.append("FFPROBE:")
+        diagnostics.append(fp.stdout.strip() or fp.stderr.strip())
+    except Exception as e:
+        diagnostics.append(f"ffprobe failed: {e}")
+    try:
+        with open(input_path, "rb") as fh:
+            head = fh.read(256)
+            diagnostics.append("HEX PREVIEW:")
+            diagnostics.append(head.hex())
+    except Exception as e:
+        diagnostics.append(f"could not read head: {e}")
+    try:
+        with open(diag_log, "w", encoding="utf-8") as fh:
+            fh.write("pydub auto error:\n")
+            fh.write(auto_err + "\n\n")
+            fh.write("Full diagnostics:\n\n")
+            fh.write("\n".join(diagnostics))
+    except Exception as e:
+        raise Exception(f"Conversion failed; diagnostics could not be written: {e}")
+    raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}\nSummary: {diagnostics[:6]}")
+# ---------- Whisper model cache ----------
+MODEL_CACHE = {}
+def get_whisper_model(name):
+    if name not in MODEL_CACHE:
+        MODEL_CACHE[name] = whisper.load_model(name)
+    return MODEL_CACHE[name]
+# ---------- Main transcription generator ----------
+def transcribe_multiple(audio_files, model_name, advanced_options, merge_checkbox, zip_file=None, zip_password=None, enable_memory=False):
+    """
+    Generator yields (log_text, transcripts_text, word_file_path_or_None, percent_int)
+    audio_files: path or list of paths (gr.File with type='filepath' gives file path string)
+    """
+    log = []
+    transcripts = []
+    word_file_path = None
+    temp_extract_dir = os.path.join(tempfile.gettempdir(), "extracted_audio")
+    extracted_audio_paths = []
+    # initial yield
+    yield "", "", None, 0
+    # cleanup
+    if os.path.exists(temp_extract_dir):
+        try:
+            shutil.rmtree(temp_extract_dir)
+            log.append(f"Cleaned previous temp dir: {temp_extract_dir}")
+        except Exception:
+            pass
+    # handle zip
+    if zip_file:
+        log.append(f"Processing zip: {zip_file}")
+        yield "\n\n".join(log), "\n\n".join(transcripts), None, 2
+        try:
+            os.makedirs(temp_extract_dir, exist_ok=True)
+            with pyzipper.ZipFile(zip_file, "r") as zf:
+                if zip_password:
+                    try: zf.setpassword(zip_password.encode())
+                    except Exception:
+                        log.append("Incorrect zip password")
+                        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+                        return
+                exts = ['.mp3', '.wav', '.aac', '.flac', '.ogg', '.dat', '.dct']
+                count = 0
+                for info in zf.infolist():
+                    if info.is_dir(): continue
+                    _, ext = os.path.splitext(info.filename)
+                    if ext.lower() in exts:
+                        try:
+                            zf.extract(info, path=temp_extract_dir)
+                            p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
+                            if os.path.exists(p):
+                                extracted_audio_paths.append(p)
+                                count += 1
+                                log.append(f"Extracted: {info.filename}")
+                        except Exception as e:
+                            log.append(f"Error extracting {info.filename}: {e}")
+                if count == 0:
+                    log.append("No supported audio in zip.")
+                    try: shutil.rmtree(temp_extract_dir)
+                    except Exception: pass
+                    yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+                    return
+        except pyzipper.BadZipFile:
+            log.append("Invalid zip file.")
+            try: shutil.rmtree(temp_extract_dir)
+            except Exception: pass
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+            return
+        except Exception as e:
+            log.append(f"Zip processing error: {e}")
+            try: shutil.rmtree(temp_extract_dir)
+            except Exception: pass
+            yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
+            return
     # collect audio file paths from either audio_files or extracted paths
     paths = []
         pass
 # ----------------------- Gradio UI -----------------------
+def run_transcription_wrapper(files, model_name, merge, zip_file, zip_password, enable_memory, advanced_options_state):
     """
+    Gradio wrapper: accepts file upload(s) and zip file (single), returns generator.
     """
     audio_input = files
     zip_path = None
     if zip_file:
         if isinstance(zip_file, (str, os.PathLike)):
             zip_path = str(zip_file)
         elif hasattr(zip_file, "name"):
             zip_path = zip_file.name
         elif isinstance(zip_file, dict) and zip_file.get("name"):
             zip_path = zip_file["name"]
+    adv = {}
+    # advanced options state could be used to pass params later
     return transcribe_multiple(audio_input, model_name, adv, merge_checkbox=merge, zip_file=zip_path, zip_password=zip_password, enable_memory=enable_memory)
 # Build Blocks UI
+ demo = gr.Blocks()
 with demo:
     gr.Markdown("## Whisper Transcription (Spaces-ready)")
             download_file = gr.File(label="Merged .docx (when enabled)")
     # connect
     submit.click(fn=run_transcription_wrapper,
                  inputs=[file_input, model_select, merge_checkbox, zip_input, zip_password, memory_checkbox, gr.State({})],
                  outputs=[logs, transcripts_out, download_file])