Spaces:

deepakkaura
/

medasr-server

Sleeping

App Files Files Community

deepakkaura commited on 16 days ago

Commit

98cb0ee

verified ·

1 Parent(s): 18192a6

Truly bypass LM when KENLM_ALPHA<=0

Browse files

Files changed (1) hide show

server.py +33 -16

server.py CHANGED Viewed

@@ -181,17 +181,33 @@ def _ensure_kenlm():
       2. /health can surface a clear "downloading" vs "ready" status.
       3. The LM file can be hot-swapped on the HF repo without rebuilding."""
     kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
-    if os.path.exists(kenlm_path):
-        size_mb = os.path.getsize(kenlm_path) / 1048576
-        logger.info("KenLM already on disk at %s (%.1f MB), skipping download.",
-                    kenlm_path, size_mb)
-        return
     url = os.environ.get(
         "KENLM_URL",
         "https://huggingface.co/chirag18/radiology-stt-assets/resolve/main/radiology.bin",
     )
-    logger.info("Downloading KenLM from %s ...", url)
     import urllib.request
     t0 = time.monotonic()
     tmp = kenlm_path + ".part"
     try:
@@ -234,22 +250,23 @@ def _build_decoder():
         labels[blank_id] = ""
     logger.info("Decoder labels: %d, blank at id=%s, sample=%s", len(labels), blank_id, labels[:6])
-    # Optional KenLM shallow fusion — set KENLM_PATH or drop the binary in
-    # /app/radiology.bin. Falls back silently to non-LM decoding if missing.
     kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
-    if os.path.exists(kenlm_path):
-        # alpha (LM weight): starting at 0.2 is conservative — empirically
-        # 0.5 was too aggressive on this corpus and ate first-characters of
-        # words at segment boundaries. Tune higher only after confirming
-        # word-boundary stability.
-        alpha = float(os.environ.get("KENLM_ALPHA", "0.2"))
-        beta = float(os.environ.get("KENLM_BETA", "1.0"))
         size_mb = os.path.getsize(kenlm_path) / 1048576
         logger.info("Loading KenLM (%.0f MB) from %s, alpha=%.2f, beta=%.2f",
                     size_mb, kenlm_path, alpha, beta)
         return build_ctcdecoder(labels, kenlm_model_path=kenlm_path,
                                 alpha=alpha, beta=beta)
-    logger.info("No KenLM at %s — using non-LM beam-search decoder.", kenlm_path)
     return build_ctcdecoder(labels)

       2. /health can surface a clear "downloading" vs "ready" status.
       3. The LM file can be hot-swapped on the HF repo without rebuilding."""
     kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
     url = os.environ.get(
         "KENLM_URL",
         "https://huggingface.co/chirag18/radiology-stt-assets/resolve/main/radiology.bin",
     )
+    # Check what's on disk vs what's on the remote — if size mismatches,
+    # the on-disk copy is stale (we shipped a new LM version) and needs
+    # to be re-downloaded. Without this, persistent Space storage caches
+    # the old LM forever and silently ignores LM updates.
     import urllib.request
+    remote_size = None
+    try:
+        with urllib.request.urlopen(url, timeout=15) as resp:
+            cl = resp.headers.get("Content-Length")
+            if cl:
+                remote_size = int(cl)
+    except Exception as e:
+        logger.warning("Could not HEAD remote KenLM (%s); will trust local file.", e)
+    if os.path.exists(kenlm_path):
+        local_size = os.path.getsize(kenlm_path)
+        if remote_size is None or local_size == remote_size:
+            logger.info("KenLM already on disk at %s (%.1f MB), matches remote — skip download.",
+                        kenlm_path, local_size / 1048576)
+            return
+        logger.info("KenLM on disk is stale (local=%.1f MB, remote=%.1f MB) — re-downloading.",
+                    local_size / 1048576, remote_size / 1048576)
+        os.remove(kenlm_path)
+    logger.info("Downloading KenLM from %s ...", url)
     t0 = time.monotonic()
     tmp = kenlm_path + ".part"
     try:
         labels[blank_id] = ""
     logger.info("Decoder labels: %d, blank at id=%s, sample=%s", len(labels), blank_id, labels[:6])
+    # Optional KenLM shallow fusion. Setting KENLM_ALPHA=0 (or any value <= 0)
+    # COMPLETELY bypasses the LM — pyctcdecode's alpha=0 still applies LM-
+    # related side effects on beam allocation/vocab, so to truly disable we
+    # call build_ctcdecoder without kenlm_model_path at all.
     kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
+    alpha = float(os.environ.get("KENLM_ALPHA", "0.05"))
+    beta = float(os.environ.get("KENLM_BETA", "1.0"))
+    if alpha > 0 and os.path.exists(kenlm_path):
         size_mb = os.path.getsize(kenlm_path) / 1048576
         logger.info("Loading KenLM (%.0f MB) from %s, alpha=%.2f, beta=%.2f",
                     size_mb, kenlm_path, alpha, beta)
         return build_ctcdecoder(labels, kenlm_model_path=kenlm_path,
                                 alpha=alpha, beta=beta)
+    if not os.path.exists(kenlm_path):
+        logger.info("No KenLM at %s — using non-LM beam-search decoder.", kenlm_path)
+    else:
+        logger.info("KENLM_ALPHA<=0 — bypassing LM (non-LM beam-search decoder).")
     return build_ctcdecoder(labels)