Spaces:

staraks
/

arvind

Sleeping

App Files Files Community

staraks commited on Nov 20, 2025

Commit

97cd142

verified ·

1 Parent(s): 4987752

Update app.py

Browse files

Files changed (1) hide show

app.py +203 -51

app.py CHANGED Viewed

@@ -35,9 +35,7 @@ print("DEBUG: imports OK", flush=True)
 # ---------- Config ----------
 MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
-MIN_WAV_SIZE = 200  # bytes
-# Small ffmpeg fallback grid (hybrid conversion)
 FFMPEG_CANDIDATES = [
     ("s16le", 16000, 1),
     ("s16le", 44100, 2),
@@ -52,7 +50,13 @@ def load_memory():
     try:
         if os.path.exists(MEMORY_FILE):
             with open(MEMORY_FILE, "r", encoding="utf-8") as fh:
-                return json.load(fh)
     except Exception:
         pass
     mem = {"words": {}, "phrases": {}}
@@ -66,8 +70,11 @@ def load_memory():
 def save_memory(mem):
     with MEMORY_LOCK:
-        with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
-            json.dump(mem, fh, ensure_ascii=False, indent=2)
 memory = load_memory()
@@ -77,6 +84,7 @@ print(
     flush=True,
 )
 # ---------- Postprocessing ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
@@ -235,6 +243,129 @@ def memory_correct_text(text, min_ratio=0.85):
     return corrected
 # ---------- File utilities ----------
 def save_as_word(text, filename=None):
     if filename is None:
@@ -247,39 +378,33 @@ def save_as_word(text, filename=None):
     return filename
-# ---------- Hybrid conversion: pydub + small ffmpeg fallback ----------
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
-    cmd = [
-        "ffmpeg",
-        "-hide_banner",
-        "-loglevel",
-        "error",
-        "-y",
-        "-f",
-        fmt,
-        "-ar",
-        str(sr),
-        "-ac",
-        str(ch),
-        "-i",
-        input_path,
-        out_path,
-    ]
     try:
-        proc = subprocess.run(cmd, capture_output=True, timeout=30, text=True)
-        if (
-            proc.returncode == 0
-            and os.path.exists(out_path)
-            and os.path.getsize(out_path) > MIN_WAV_SIZE
-        ):
-            return True, proc.stderr + proc.stdout
         else:
             try:
                 if os.path.exists(out_path):
                     os.unlink(out_path)
             except Exception:
                 pass
-            return False, proc.stderr + proc.stdout
     except Exception as e:
         try:
             if os.path.exists(out_path):
@@ -324,9 +449,7 @@ def convert_to_wav_if_needed(input_path):
         out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
         out_wav.close()
         success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
-        diagnostics.append(
-            f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n"
-        )
         if success:
             try:
                 with open(diag_log, "w", encoding="utf-8") as fh:
@@ -373,9 +496,7 @@ def convert_to_wav_if_needed(input_path):
     except Exception as e:
         raise Exception(f"Conversion failed; diagnostics write error: {e}")
-    raise Exception(
-        f"Could not convert file to WAV. Diagnostics saved to: {diag_log}"
-    )
 # ---------- Whisper model cache ----------
@@ -385,6 +506,7 @@ MODEL_CACHE = {}
 def get_whisper_model(name):
     if name not in MODEL_CACHE:
         print(f"DEBUG: loading whisper model '{name}'", flush=True)
         MODEL_CACHE[name] = whisper.load_model(name)
     return MODEL_CACHE[name]
@@ -430,9 +552,8 @@ def transcribe_multiple(
                     try:
                         zf.setpassword(zip_password.encode())
                     except Exception:
-                        log.append("Incorrect zip password")
-                        yield "\n\n".join(log), "\n\n".join(transcripts), None, 100
-                        return
                 exts = [
                     ".mp3",
                     ".wav",
@@ -451,12 +572,16 @@ def transcribe_multiple(
                     if ext.lower() in exts:
                         try:
                             zf.extract(info, path=temp_extract_dir)
                         except Exception as e:
                             log.append(f"Error extracting {info.filename}: {e}")
                             continue
-                        p = os.path.normpath(
-                            os.path.join(temp_extract_dir, info.filename)
-                        )
                         if os.path.exists(p):
                             extracted_audio_paths.append(p)
                             count += 1
@@ -575,12 +700,16 @@ def transcribe_multiple(
             try:
                 if wav and os.path.exists(wav):
                     tmpdir = tempfile.gettempdir()
-                    if (
-                        os.path.commonpath([tmpdir, os.path.abspath(wav)])
-                        == tmpdir
-                        and not p.lower().endswith(".wav")
-                    ):
-                        os.unlink(wav)
             except Exception:
                 pass
@@ -699,7 +828,7 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
             default_zip_password = gr.Textbox(
                 label="Default ZIP password",
-                value="dietcoke1",  # you can change this
                 interactive=True,
             )
@@ -720,12 +849,22 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
             )
             memory_checkbox = gr.Checkbox(
-                label="Enable correction memory",
                 value=False,
             )
             submit = gr.Button("Transcribe", variant="primary")
         # RIGHT: Outputs (Transcript → Progress → Download → Logs)
         with gr.Column(scale=1):
             gr.Markdown("### Output")
@@ -755,6 +894,7 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
                 interactive=False,
             )
     submit.click(
         fn=run_transcription_wrapper,
         inputs=[
@@ -772,6 +912,18 @@ with gr.Blocks(title="Whisper Transcriber") as demo:
         outputs=[logs, transcripts_out, download_file, progress_num],
     )
 # ---------- Launch ----------
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))

 # ---------- Config ----------
 MEMORY_FILE = "memory.json"
 MEMORY_LOCK = threading.Lock()
+MIN_WAV_SIZE = 1024  # raised slightly from 200 for safety
 FFMPEG_CANDIDATES = [
     ("s16le", 16000, 1),
     ("s16le", 44100, 2),
     try:
         if os.path.exists(MEMORY_FILE):
             with open(MEMORY_FILE, "r", encoding="utf-8") as fh:
+                data = json.load(fh)
+                # validate minimal structure
+                if not isinstance(data, dict):
+                    raise ValueError("memory.json root not dict")
+                data.setdefault("words", {})
+                data.setdefault("phrases", {})
+                return data
     except Exception:
         pass
     mem = {"words": {}, "phrases": {}}
 def save_memory(mem):
     with MEMORY_LOCK:
+        try:
+            with open(MEMORY_FILE, "w", encoding="utf-8") as fh:
+                json.dump(mem, fh, ensure_ascii=False, indent=2)
+        except Exception:
+            traceback.print_exc()
 memory = load_memory()
     flush=True,
 )
 # ---------- Postprocessing ----------
 MEDICAL_ABBREVIATIONS = {
     "pt": "patient",
     return corrected
+# ---------- Memory management UI helpers ----------
+def import_memory_file(uploaded):
+    """
+    Accepts an uploaded file object or filepath. Accepts:
+     - JSON of shape {"words": {...}, "phrases": {...}}
+     - Plain newline-separated words or CSV-like lines "word,count"
+    Returns status message.
+    """
+    global memory
+    if not uploaded:
+        return "No file provided."
+    path = None
+    try:
+        if isinstance(uploaded, (str, os.PathLike)):
+            path = str(uploaded)
+        elif hasattr(uploaded, "name"):
+            path = uploaded.name
+        elif isinstance(uploaded, dict) and uploaded.get("name"):
+            path = uploaded["name"]
+        else:
+            return "Unable to determine uploaded file path."
+        # read file
+        with open(path, "r", encoding="utf-8") as fh:
+            raw = fh.read()
+        # try JSON first
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, dict):
+                with MEMORY_LOCK:
+                    # merge words/phrases
+                    parsed_words = parsed.get("words", {})
+                    parsed_phrases = parsed.get("phrases", {})
+                    for k, v in parsed_words.items():
+                        memory["words"][k.lower()] = memory["words"].get(k.lower(), 0) + int(v)
+                    for k, v in parsed_phrases.items():
+                        memory["phrases"][k] = memory["phrases"].get(k, 0) + int(v)
+                    save_memory(memory)
+                return f"Imported JSON memory (words={len(parsed_words)}, phrases={len(parsed_phrases)})."
+        except Exception:
+            # not JSON, fallback to newline parse
+            pass
+        # fallback: split lines, attempt "word,count" or just "word"
+        lines = [l.strip() for l in raw.splitlines() if l.strip()]
+        added_words = 0
+        added_phrases = 0
+        with MEMORY_LOCK:
+            for line in lines:
+                if "," in line:
+                    parts = [p.strip() for p in line.split(",", 1)]
+                    key = parts[0].lower()
+                    try:
+                        cnt = int(parts[1])
+                    except Exception:
+                        cnt = 1
+                    memory["words"][key] = memory["words"].get(key, 0) + cnt
+                    added_words += 1
+                else:
+                    # treat as word (if short) else as phrase
+                    if len(line.split()) <= 3:
+                        key = line.lower()
+                        memory["words"][key] = memory["words"].get(key, 0) + 1
+                        added_words += 1
+                    else:
+                        memory["phrases"][line] = memory["phrases"].get(line, 0) + 1
+                        added_phrases += 1
+            save_memory(memory)
+        return f"Imported {added_words} words and {added_phrases} phrases from file."
+    except Exception as e:
+        traceback.print_exc()
+        return f"Import failed: {e}"
+def add_memory_entry(entry):
+    """
+    Add a single 'word' or phrase. If entry is short (<=3 words) treat as word, else phrase.
+    """
+    global memory
+    if not entry or not entry.strip():
+        return "No entry provided."
+    e = entry.strip()
+    with MEMORY_LOCK:
+        if len(e.split()) <= 3:
+            key = e.lower()
+            memory["words"][key] = memory["words"].get(key, 0) + 1
+            save_memory(memory)
+            return f"Added/updated word: '{key}'."
+        else:
+            memory["phrases"][e] = memory["phrases"].get(e, 0) + 1
+            save_memory(memory)
+            return f"Added/updated phrase: '{e}'."
+def clear_memory():
+    global memory
+    with MEMORY_LOCK:
+        memory = {"words": {}, "phrases": {}}
+        save_memory(memory)
+    return "Memory cleared."
+def view_memory(limit=2000):
+    """
+    Returns a text summary of memory (words sorted by count then phrases).
+    limit parameter caps returned characters for UI.
+    """
+    w = memory.get("words", {})
+    p = memory.get("phrases", {})
+    out_lines = []
+    out_lines.append("WORDS (top 50):")
+    for k, v in sorted(w.items(), key=lambda kv: -kv[1])[:50]:
+        out_lines.append(f"{k}: {v}")
+    out_lines.append("")
+    out_lines.append("PHRASES (top 50):")
+    for k, v in sorted(p.items(), key=lambda kv: -kv[1])[:50]:
+        out_lines.append(f"{k}: {v}")
+    out = "\n".join(out_lines)
+    if len(out) > limit:
+        out = out[:limit] + "\n...truncated..."
+    return out
 # ---------- File utilities ----------
 def save_as_word(text, filename=None):
     if filename is None:
     return filename
+# ---------- improved ffmpeg convert ----------
 def _ffmpeg_convert(input_path, out_path, fmt, sr, ch):
+    """
+    Use ffmpeg to convert input_path -> out_path.
+    Let ffmpeg autodetect input unless fmt signals raw PCM.
+    """
     try:
+        cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error", "-y"]
+        if fmt in ("s16le", "pcm_s16le", "mulaw"):
+            # raw input: specify input format and sample params before -i
+            cmd += ["-f", fmt, "-ar", str(sr), "-ac", str(ch), "-i", input_path, out_path]
+        else:
+            # autodetect input, request output sample rate/channels
+            cmd += ["-i", input_path, "-ar", str(sr), "-ac", str(ch), out_path]
+        proc = subprocess.run(cmd, capture_output=True, timeout=60, text=True)
+        stdout_stderr = (proc.stdout or "") + (proc.stderr or "")
+        if proc.returncode == 0 and os.path.exists(out_path) and os.path.getsize(out_path) > MIN_WAV_SIZE:
+            return True, stdout_stderr
         else:
             try:
                 if os.path.exists(out_path):
                     os.unlink(out_path)
             except Exception:
                 pass
+            return False, stdout_stderr
     except Exception as e:
         try:
             if os.path.exists(out_path):
         out_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
         out_wav.close()
         success, debug = _ffmpeg_convert(input_path, out_wav.name, fmt, sr, ch)
+        diagnostics.append(f"TRY fmt={fmt} sr={sr} ch={ch} success={success}\n{debug}\n")
         if success:
             try:
                 with open(diag_log, "w", encoding="utf-8") as fh:
     except Exception as e:
         raise Exception(f"Conversion failed; diagnostics write error: {e}")
+    raise Exception(f"Could not convert file to WAV. Diagnostics saved to: {diag_log}")
 # ---------- Whisper model cache ----------
 def get_whisper_model(name):
     if name not in MODEL_CACHE:
         print(f"DEBUG: loading whisper model '{name}'", flush=True)
+        # You can set device by changing whisper.load_model(name, device="cpu") if needed.
         MODEL_CACHE[name] = whisper.load_model(name)
     return MODEL_CACHE[name]
                     try:
                         zf.setpassword(zip_password.encode())
                     except Exception:
+                        log.append("Failed to set zip password (unexpected).")
                 exts = [
                     ".mp3",
                     ".wav",
                     if ext.lower() in exts:
                         try:
                             zf.extract(info, path=temp_extract_dir)
+                        except RuntimeError as e:
+                            log.append(f"Password required or incorrect for {info.filename}: {e}")
+                            continue
+                        except pyzipper.BadZipFile:
+                            log.append(f"Bad zip entry: {info.filename}")
+                            continue
                         except Exception as e:
                             log.append(f"Error extracting {info.filename}: {e}")
                             continue
+                        p = os.path.normpath(os.path.join(temp_extract_dir, info.filename))
                         if os.path.exists(p):
                             extracted_audio_paths.append(p)
                             count += 1
             try:
                 if wav and os.path.exists(wav):
                     tmpdir = tempfile.gettempdir()
+                    try:
+                        common = os.path.commonpath([os.path.abspath(tmpdir), os.path.abspath(wav)])
+                        if common == os.path.abspath(tmpdir) and not p.lower().endswith(".wav"):
+                            os.unlink(wav)
+                    except Exception:
+                        try:
+                            if tmpdir in os.path.abspath(wav) and not p.lower().endswith(".wav"):
+                                os.unlink(wav)
+                        except Exception:
+                            pass
             except Exception:
                 pass
             default_zip_password = gr.Textbox(
                 label="Default ZIP password",
+                value="dietcoke1",
                 interactive=True,
             )
             )
             memory_checkbox = gr.Checkbox(
+                label="Enable correction memory (use during transcription)",
                 value=False,
             )
             submit = gr.Button("Transcribe", variant="primary")
+            # Memory management UI
+            gr.Markdown("### Memory management")
+            mem_upload = gr.File(label="Import memory file (JSON or text)", file_count="single", type="file")
+            mem_import_btn = gr.Button("Import Memory File")
+            mem_manual_entry = gr.Textbox(label="Add word/phrase to memory (manual)", placeholder="Type a word or phrase")
+            mem_add_btn = gr.Button("Add to Memory")
+            mem_clear_btn = gr.Button("Clear Memory")
+            mem_view_btn = gr.Button("View Memory")
+            mem_status = gr.Textbox(label="Memory status", interactive=False, lines=4)
         # RIGHT: Outputs (Transcript → Progress → Download → Logs)
         with gr.Column(scale=1):
             gr.Markdown("### Output")
                 interactive=False,
             )
+    # Transcription click binding
     submit.click(
         fn=run_transcription_wrapper,
         inputs=[
         outputs=[logs, transcripts_out, download_file, progress_num],
     )
+    # Memory button bindings
+    def _import_memory(uploaded):
+        return import_memory_file(uploaded)
+    mem_import_btn.click(fn=_import_memory, inputs=[mem_upload], outputs=[mem_status])
+    mem_add_btn.click(fn=add_memory_entry, inputs=[mem_manual_entry], outputs=[mem_status])
+    mem_clear_btn.click(fn=lambda: clear_memory(), inputs=[], outputs=[mem_status])
+    mem_view_btn.click(fn=lambda: view_memory(), inputs=[], outputs=[mem_status])
 # ---------- Launch ----------
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))