deepakkaura commited on
Commit
98cb0ee
Β·
verified Β·
1 Parent(s): 18192a6

Truly bypass LM when KENLM_ALPHA<=0

Browse files
Files changed (1) hide show
  1. server.py +33 -16
server.py CHANGED
@@ -181,17 +181,33 @@ def _ensure_kenlm():
181
  2. /health can surface a clear "downloading" vs "ready" status.
182
  3. The LM file can be hot-swapped on the HF repo without rebuilding."""
183
  kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
184
- if os.path.exists(kenlm_path):
185
- size_mb = os.path.getsize(kenlm_path) / 1048576
186
- logger.info("KenLM already on disk at %s (%.1f MB), skipping download.",
187
- kenlm_path, size_mb)
188
- return
189
  url = os.environ.get(
190
  "KENLM_URL",
191
  "https://huggingface.co/chirag18/radiology-stt-assets/resolve/main/radiology.bin",
192
  )
193
- logger.info("Downloading KenLM from %s ...", url)
 
 
 
194
  import urllib.request
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  t0 = time.monotonic()
196
  tmp = kenlm_path + ".part"
197
  try:
@@ -234,22 +250,23 @@ def _build_decoder():
234
  labels[blank_id] = ""
235
  logger.info("Decoder labels: %d, blank at id=%s, sample=%s", len(labels), blank_id, labels[:6])
236
 
237
- # Optional KenLM shallow fusion β€” set KENLM_PATH or drop the binary in
238
- # /app/radiology.bin. Falls back silently to non-LM decoding if missing.
 
 
239
  kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
240
- if os.path.exists(kenlm_path):
241
- # alpha (LM weight): starting at 0.2 is conservative β€” empirically
242
- # 0.5 was too aggressive on this corpus and ate first-characters of
243
- # words at segment boundaries. Tune higher only after confirming
244
- # word-boundary stability.
245
- alpha = float(os.environ.get("KENLM_ALPHA", "0.2"))
246
- beta = float(os.environ.get("KENLM_BETA", "1.0"))
247
  size_mb = os.path.getsize(kenlm_path) / 1048576
248
  logger.info("Loading KenLM (%.0f MB) from %s, alpha=%.2f, beta=%.2f",
249
  size_mb, kenlm_path, alpha, beta)
250
  return build_ctcdecoder(labels, kenlm_model_path=kenlm_path,
251
  alpha=alpha, beta=beta)
252
- logger.info("No KenLM at %s β€” using non-LM beam-search decoder.", kenlm_path)
 
 
 
253
  return build_ctcdecoder(labels)
254
 
255
 
 
181
  2. /health can surface a clear "downloading" vs "ready" status.
182
  3. The LM file can be hot-swapped on the HF repo without rebuilding."""
183
  kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
 
 
 
 
 
184
  url = os.environ.get(
185
  "KENLM_URL",
186
  "https://huggingface.co/chirag18/radiology-stt-assets/resolve/main/radiology.bin",
187
  )
188
+ # Check what's on disk vs what's on the remote β€” if size mismatches,
189
+ # the on-disk copy is stale (we shipped a new LM version) and needs
190
+ # to be re-downloaded. Without this, persistent Space storage caches
191
+ # the old LM forever and silently ignores LM updates.
192
  import urllib.request
193
+ remote_size = None
194
+ try:
195
+ with urllib.request.urlopen(url, timeout=15) as resp:
196
+ cl = resp.headers.get("Content-Length")
197
+ if cl:
198
+ remote_size = int(cl)
199
+ except Exception as e:
200
+ logger.warning("Could not HEAD remote KenLM (%s); will trust local file.", e)
201
+ if os.path.exists(kenlm_path):
202
+ local_size = os.path.getsize(kenlm_path)
203
+ if remote_size is None or local_size == remote_size:
204
+ logger.info("KenLM already on disk at %s (%.1f MB), matches remote β€” skip download.",
205
+ kenlm_path, local_size / 1048576)
206
+ return
207
+ logger.info("KenLM on disk is stale (local=%.1f MB, remote=%.1f MB) β€” re-downloading.",
208
+ local_size / 1048576, remote_size / 1048576)
209
+ os.remove(kenlm_path)
210
+ logger.info("Downloading KenLM from %s ...", url)
211
  t0 = time.monotonic()
212
  tmp = kenlm_path + ".part"
213
  try:
 
250
  labels[blank_id] = ""
251
  logger.info("Decoder labels: %d, blank at id=%s, sample=%s", len(labels), blank_id, labels[:6])
252
 
253
+ # Optional KenLM shallow fusion. Setting KENLM_ALPHA=0 (or any value <= 0)
254
+ # COMPLETELY bypasses the LM β€” pyctcdecode's alpha=0 still applies LM-
255
+ # related side effects on beam allocation/vocab, so to truly disable we
256
+ # call build_ctcdecoder without kenlm_model_path at all.
257
  kenlm_path = os.environ.get("KENLM_PATH", "/app/radiology.bin")
258
+ alpha = float(os.environ.get("KENLM_ALPHA", "0.05"))
259
+ beta = float(os.environ.get("KENLM_BETA", "1.0"))
260
+ if alpha > 0 and os.path.exists(kenlm_path):
 
 
 
 
261
  size_mb = os.path.getsize(kenlm_path) / 1048576
262
  logger.info("Loading KenLM (%.0f MB) from %s, alpha=%.2f, beta=%.2f",
263
  size_mb, kenlm_path, alpha, beta)
264
  return build_ctcdecoder(labels, kenlm_model_path=kenlm_path,
265
  alpha=alpha, beta=beta)
266
+ if not os.path.exists(kenlm_path):
267
+ logger.info("No KenLM at %s β€” using non-LM beam-search decoder.", kenlm_path)
268
+ else:
269
+ logger.info("KENLM_ALPHA<=0 β€” bypassing LM (non-LM beam-search decoder).")
270
  return build_ctcdecoder(labels)
271
 
272