Um34ER commited on
Commit
5fa3309
Β·
verified Β·
1 Parent(s): 356351d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -2
app.py CHANGED
@@ -569,7 +569,8 @@ PRIMARY_MODEL_ID = os.getenv("PRIMARY_MODEL_ID", "oddadmix/Qaari-0.1-Urdu-OCR-VL
569
  FALLBACK_MODEL_ID = os.getenv("FALLBACK_MODEL_ID", "stepfun-ai/GOT-OCR-2.0-hf")
570
  ENABLE_FALLBACK = os.getenv("ENABLE_FALLBACK", "1").strip() not in ("0", "false", "no")
571
  VLM_MEMORY_LIMIT_MB = float(os.getenv("VLM_MEMORY_LIMIT_MB", "12000"))
572
- VLM_MAX_TOKENS = int(os.getenv("VLM_MAX_NEW_TOKENS", "512"))
 
573
  VLM_TIMEOUT = float(os.getenv("VLM_TIMEOUT_SECONDS", "75"))
574
 
575
 
@@ -875,7 +876,7 @@ logger = logging.getLogger("parchi.app")
875
 
876
  # ── Constants ─────────────────────────────────────────────────────────────────
877
  MAX_IMAGE_SIZE_MB = 10
878
- CONCURRENCY_LIMIT = 2 # max simultaneous VLM inferences
879
  CACHE_SIZE = 50 # LRU cache entries
880
  CACHE_TTL = 3600 # 1 hour
881
 
@@ -885,6 +886,23 @@ semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT)
885
  result_cache: Dict[str, dict] = {} # hash β†’ {result, timestamp}
886
 
887
  # ── FastAPI App ───────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
888
  app = FastAPI(
889
  title="Smart Parchi OCR v7",
890
  description=(
@@ -892,6 +910,7 @@ app = FastAPI(
892
  "Qaari-0.1 (Urdu Nastaliq) + GOT-OCR 2.0 fallback. No external APIs."
893
  ),
894
  version="7.0.0",
 
895
  )
896
 
897
  app.add_middleware(
 
569
  FALLBACK_MODEL_ID = os.getenv("FALLBACK_MODEL_ID", "stepfun-ai/GOT-OCR-2.0-hf")
570
  ENABLE_FALLBACK = os.getenv("ENABLE_FALLBACK", "1").strip() not in ("0", "false", "no")
571
  VLM_MEMORY_LIMIT_MB = float(os.getenv("VLM_MEMORY_LIMIT_MB", "12000"))
572
+ # 200 tokens is plenty for a grocery receipt (Qaari output was 68 chars)
573
+ VLM_MAX_TOKENS = int(os.getenv("VLM_MAX_NEW_TOKENS", "200"))
574
  VLM_TIMEOUT = float(os.getenv("VLM_TIMEOUT_SECONDS", "75"))
575
 
576
 
 
876
 
877
  # ── Constants ─────────────────────────────────────────────────────────────────
878
  MAX_IMAGE_SIZE_MB = 10
879
+ CONCURRENCY_LIMIT = 1 # 1 worker only β€” Qwen2-VL-2B fp32 uses ~9GB on CPU
880
  CACHE_SIZE = 50 # LRU cache entries
881
  CACHE_TTL = 3600 # 1 hour
882
 
 
886
  result_cache: Dict[str, dict] = {} # hash β†’ {result, timestamp}
887
 
888
  # ── FastAPI App ───────────────────────────────────────────────────────────────
889
+ from contextlib import asynccontextmanager
890
+
891
+ @asynccontextmanager
892
+ async def lifespan(app: FastAPI):
893
+ """Pre-warm the VLM at container startup so first request isn't penalized."""
894
+ logger.info("=== Startup: pre-warming primary OCR model ===")
895
+ loop = asyncio.get_event_loop()
896
+ try:
897
+ await loop.run_in_executor(None, ocr_engine._load_primary)
898
+ logger.info("=== Startup: model ready | RSS=%.0f MB ===", _rss_mb())
899
+ except Exception as e:
900
+ logger.error("=== Startup: model pre-warm FAILED: %s ===", e)
901
+ yield # App runs here
902
+ logger.info("=== Shutdown: releasing model ===")
903
+ ocr_engine._unload_primary()
904
+ ocr_engine._unload_fallback()
905
+
906
  app = FastAPI(
907
  title="Smart Parchi OCR v7",
908
  description=(
 
910
  "Qaari-0.1 (Urdu Nastaliq) + GOT-OCR 2.0 fallback. No external APIs."
911
  ),
912
  version="7.0.0",
913
+ lifespan=lifespan,
914
  )
915
 
916
  app.add_middleware(