Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -569,7 +569,8 @@ PRIMARY_MODEL_ID = os.getenv("PRIMARY_MODEL_ID", "oddadmix/Qaari-0.1-Urdu-OCR-VL
|
|
| 569 |
FALLBACK_MODEL_ID = os.getenv("FALLBACK_MODEL_ID", "stepfun-ai/GOT-OCR-2.0-hf")
|
| 570 |
ENABLE_FALLBACK = os.getenv("ENABLE_FALLBACK", "1").strip() not in ("0", "false", "no")
|
| 571 |
VLM_MEMORY_LIMIT_MB = float(os.getenv("VLM_MEMORY_LIMIT_MB", "12000"))
|
| 572 |
-
|
|
|
|
| 573 |
VLM_TIMEOUT = float(os.getenv("VLM_TIMEOUT_SECONDS", "75"))
|
| 574 |
|
| 575 |
|
|
@@ -875,7 +876,7 @@ logger = logging.getLogger("parchi.app")
|
|
| 875 |
|
| 876 |
# ββ Constants βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 877 |
MAX_IMAGE_SIZE_MB = 10
|
| 878 |
-
CONCURRENCY_LIMIT =
|
| 879 |
CACHE_SIZE = 50 # LRU cache entries
|
| 880 |
CACHE_TTL = 3600 # 1 hour
|
| 881 |
|
|
@@ -885,6 +886,23 @@ semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT)
|
|
| 885 |
result_cache: Dict[str, dict] = {} # hash β {result, timestamp}
|
| 886 |
|
| 887 |
# ββ FastAPI App βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 888 |
app = FastAPI(
|
| 889 |
title="Smart Parchi OCR v7",
|
| 890 |
description=(
|
|
@@ -892,6 +910,7 @@ app = FastAPI(
|
|
| 892 |
"Qaari-0.1 (Urdu Nastaliq) + GOT-OCR 2.0 fallback. No external APIs."
|
| 893 |
),
|
| 894 |
version="7.0.0",
|
|
|
|
| 895 |
)
|
| 896 |
|
| 897 |
app.add_middleware(
|
|
|
|
| 569 |
FALLBACK_MODEL_ID = os.getenv("FALLBACK_MODEL_ID", "stepfun-ai/GOT-OCR-2.0-hf")
|
| 570 |
ENABLE_FALLBACK = os.getenv("ENABLE_FALLBACK", "1").strip() not in ("0", "false", "no")
|
| 571 |
VLM_MEMORY_LIMIT_MB = float(os.getenv("VLM_MEMORY_LIMIT_MB", "12000"))
|
| 572 |
+
# 200 tokens is plenty for a grocery receipt (Qaari output was 68 chars)
|
| 573 |
+
VLM_MAX_TOKENS = int(os.getenv("VLM_MAX_NEW_TOKENS", "200"))
|
| 574 |
VLM_TIMEOUT = float(os.getenv("VLM_TIMEOUT_SECONDS", "75"))
|
| 575 |
|
| 576 |
|
|
|
|
| 876 |
|
| 877 |
# ββ Constants βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 878 |
MAX_IMAGE_SIZE_MB = 10
|
| 879 |
+
CONCURRENCY_LIMIT = 1 # 1 worker only β Qwen2-VL-2B fp32 uses ~9GB on CPU
|
| 880 |
CACHE_SIZE = 50 # LRU cache entries
|
| 881 |
CACHE_TTL = 3600 # 1 hour
|
| 882 |
|
|
|
|
| 886 |
result_cache: Dict[str, dict] = {} # hash β {result, timestamp}
|
| 887 |
|
| 888 |
# ββ FastAPI App βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 889 |
+
from contextlib import asynccontextmanager
|
| 890 |
+
|
| 891 |
+
@asynccontextmanager
|
| 892 |
+
async def lifespan(app: FastAPI):
|
| 893 |
+
"""Pre-warm the VLM at container startup so first request isn't penalized."""
|
| 894 |
+
logger.info("=== Startup: pre-warming primary OCR model ===")
|
| 895 |
+
loop = asyncio.get_event_loop()
|
| 896 |
+
try:
|
| 897 |
+
await loop.run_in_executor(None, ocr_engine._load_primary)
|
| 898 |
+
logger.info("=== Startup: model ready | RSS=%.0f MB ===", _rss_mb())
|
| 899 |
+
except Exception as e:
|
| 900 |
+
logger.error("=== Startup: model pre-warm FAILED: %s ===", e)
|
| 901 |
+
yield # App runs here
|
| 902 |
+
logger.info("=== Shutdown: releasing model ===")
|
| 903 |
+
ocr_engine._unload_primary()
|
| 904 |
+
ocr_engine._unload_fallback()
|
| 905 |
+
|
| 906 |
app = FastAPI(
|
| 907 |
title="Smart Parchi OCR v7",
|
| 908 |
description=(
|
|
|
|
| 910 |
"Qaari-0.1 (Urdu Nastaliq) + GOT-OCR 2.0 fallback. No external APIs."
|
| 911 |
),
|
| 912 |
version="7.0.0",
|
| 913 |
+
lifespan=lifespan,
|
| 914 |
)
|
| 915 |
|
| 916 |
app.add_middleware(
|