Spaces:
Sleeping
Sleeping
Truly bypass LM when KENLM_ALPHA<=0
Browse files
server.py
CHANGED
|
@@ -181,17 +181,33 @@ def _ensure_kenlm():
|
|
| 181 |
2. /health can surface a clear "downloading" vs "ready" status.
|
| 182 |
3. The LM file can be hot-swapped on the HF repo without rebuilding."""
|
| 183 |
kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
|
| 184 |
-
if os.path.exists(kenlm_path):
|
| 185 |
-
size_mb = os.path.getsize(kenlm_path) / 1048576
|
| 186 |
-
logger.info("KenLM already on disk at %s (%.1f MB), skipping download.",
|
| 187 |
-
kenlm_path, size_mb)
|
| 188 |
-
return
|
| 189 |
url = os.environ.get(
|
| 190 |
"KENLM_URL",
|
| 191 |
"https://huggingface.co/chirag18/radiology-stt-assets/resolve/main/radiology.bin",
|
| 192 |
)
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
| 194 |
import urllib.request
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
t0 = time.monotonic()
|
| 196 |
tmp = kenlm_path + ".part"
|
| 197 |
try:
|
|
@@ -234,22 +250,23 @@ def _build_decoder():
|
|
| 234 |
labels[blank_id] = ""
|
| 235 |
logger.info("Decoder labels: %d, blank at id=%s, sample=%s", len(labels), blank_id, labels[:6])
|
| 236 |
|
| 237 |
-
# Optional KenLM shallow fusion
|
| 238 |
-
#
|
|
|
|
|
|
|
| 239 |
kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
# words at segment boundaries. Tune higher only after confirming
|
| 244 |
-
# word-boundary stability.
|
| 245 |
-
alpha = float(os.environ.get("KENLM_ALPHA", "0.2"))
|
| 246 |
-
beta = float(os.environ.get("KENLM_BETA", "1.0"))
|
| 247 |
size_mb = os.path.getsize(kenlm_path) / 1048576
|
| 248 |
logger.info("Loading KenLM (%.0f MB) from %s, alpha=%.2f, beta=%.2f",
|
| 249 |
size_mb, kenlm_path, alpha, beta)
|
| 250 |
return build_ctcdecoder(labels, kenlm_model_path=kenlm_path,
|
| 251 |
alpha=alpha, beta=beta)
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
| 253 |
return build_ctcdecoder(labels)
|
| 254 |
|
| 255 |
|
|
|
|
| 181 |
2. /health can surface a clear "downloading" vs "ready" status.
|
| 182 |
3. The LM file can be hot-swapped on the HF repo without rebuilding."""
|
| 183 |
kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
url = os.environ.get(
|
| 185 |
"KENLM_URL",
|
| 186 |
"https://huggingface.co/chirag18/radiology-stt-assets/resolve/main/radiology.bin",
|
| 187 |
)
|
| 188 |
+
# Check what's on disk vs what's on the remote β if size mismatches,
|
| 189 |
+
# the on-disk copy is stale (we shipped a new LM version) and needs
|
| 190 |
+
# to be re-downloaded. Without this, persistent Space storage caches
|
| 191 |
+
# the old LM forever and silently ignores LM updates.
|
| 192 |
import urllib.request
|
| 193 |
+
remote_size = None
|
| 194 |
+
try:
|
| 195 |
+
with urllib.request.urlopen(url, timeout=15) as resp:
|
| 196 |
+
cl = resp.headers.get("Content-Length")
|
| 197 |
+
if cl:
|
| 198 |
+
remote_size = int(cl)
|
| 199 |
+
except Exception as e:
|
| 200 |
+
logger.warning("Could not HEAD remote KenLM (%s); will trust local file.", e)
|
| 201 |
+
if os.path.exists(kenlm_path):
|
| 202 |
+
local_size = os.path.getsize(kenlm_path)
|
| 203 |
+
if remote_size is None or local_size == remote_size:
|
| 204 |
+
logger.info("KenLM already on disk at %s (%.1f MB), matches remote β skip download.",
|
| 205 |
+
kenlm_path, local_size / 1048576)
|
| 206 |
+
return
|
| 207 |
+
logger.info("KenLM on disk is stale (local=%.1f MB, remote=%.1f MB) β re-downloading.",
|
| 208 |
+
local_size / 1048576, remote_size / 1048576)
|
| 209 |
+
os.remove(kenlm_path)
|
| 210 |
+
logger.info("Downloading KenLM from %s ...", url)
|
| 211 |
t0 = time.monotonic()
|
| 212 |
tmp = kenlm_path + ".part"
|
| 213 |
try:
|
|
|
|
| 250 |
labels[blank_id] = ""
|
| 251 |
logger.info("Decoder labels: %d, blank at id=%s, sample=%s", len(labels), blank_id, labels[:6])
|
| 252 |
|
| 253 |
+
# Optional KenLM shallow fusion. Setting KENLM_ALPHA=0 (or any value <= 0)
|
| 254 |
+
# COMPLETELY bypasses the LM β pyctcdecode's alpha=0 still applies LM-
|
| 255 |
+
# related side effects on beam allocation/vocab, so to truly disable we
|
| 256 |
+
# call build_ctcdecoder without kenlm_model_path at all.
|
| 257 |
kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
|
| 258 |
+
alpha = float(os.environ.get("KENLM_ALPHA", "0.05"))
|
| 259 |
+
beta = float(os.environ.get("KENLM_BETA", "1.0"))
|
| 260 |
+
if alpha > 0 and os.path.exists(kenlm_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
size_mb = os.path.getsize(kenlm_path) / 1048576
|
| 262 |
logger.info("Loading KenLM (%.0f MB) from %s, alpha=%.2f, beta=%.2f",
|
| 263 |
size_mb, kenlm_path, alpha, beta)
|
| 264 |
return build_ctcdecoder(labels, kenlm_model_path=kenlm_path,
|
| 265 |
alpha=alpha, beta=beta)
|
| 266 |
+
if not os.path.exists(kenlm_path):
|
| 267 |
+
logger.info("No KenLM at %s β using non-LM beam-search decoder.", kenlm_path)
|
| 268 |
+
else:
|
| 269 |
+
logger.info("KENLM_ALPHA<=0 β bypassing LM (non-LM beam-search decoder).")
|
| 270 |
return build_ctcdecoder(labels)
|
| 271 |
|
| 272 |
|